diff --git a/hpvm/test/dnn_benchmarks/benchmarks/.gitignore b/hpvm/test/dnn_benchmarks/benchmarks/.gitignore deleted file mode 100644 index 1708439d4e732070c5058aaf88d78a55313132e7..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -build/ -stats/ -output/ -std_output/ -profile_info_*.txt -profile_data.txt -psnr.txt -ssim.txt -*.sh -*.py diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt deleted file mode 100644 index 27ae10a43706557e20fc89f3aa13f9d427809778..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_confs_batch220.txt +++ /dev/null @@ -1,850 +0,0 @@ -+++++ -conf1 1 0 78.75 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf1 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf2 1.561580129 0 78.279991 0.7050135000000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf3 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf4 1.62843286633 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf5 1.561580129 0 78.199997 0.8250045000000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf6 1.5 0 78.840004 0.3099960000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf7 1.63231514248 0 78.180008 0.8549879999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf8 1.561580129 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf9 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf10 1.68452312305 0 78.55999 0.5900100000000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf11 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf12 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf13 1.58691558324 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf14 1.63231514248 0 78.119995 0.9450074999999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf15 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf16 1.58691558324 0 78.400009 0.5249865000000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf17 1.540499209 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf18 1.70550344452 0 77.979996 1.1550060000000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf19 1.60552156231 0 77.719994 1.5450090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf20 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf21 1.69337635738 0 77.599998 1.725003000000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf22 1.69337635738 0 77.68 1.6049999999999898 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf23 1.70550344452 0 77.580002 1.7549970000000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf24 1.80498002224 0 77.479996 1.9050060000000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf25 1.7496306648 0 78.060005 1.0349924999999942 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf26 1.80498002224 0 77.500008 1.874988000000009 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf27 1.69337635738 0 77.639999 1.6650014999999954 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf28 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf29 1.81876478645 0 77.499992 1.875011999999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf30 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf31 1.81876478645 0 77.580002 1.7549970000000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf32 1.73138014145 0 77.879997 1.3050044999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf33 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf34 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf35 1.63231514248 0 77.939995 1.2150075000000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf36 1.7855629355 0 77.5 1.875 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf37 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf38 1.80498002224 0 77.540001 1.8149984999999944 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf39 1.69337635738 0 77.860001 1.3349985000000046 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf40 1.69337635738 0 77.659996 1.63500599999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf41 1.64654492165 0 77.219994 2.2950090000000003 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf42 1.70550344452 0 77.659996 1.63500599999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf43 1.63426323052 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf44 1.85072010812 0 77.339996 2.115006000000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf45 1.70763025603 0 76.860001 2.8349985000000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf46 1.75805416249 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf47 1.80736234275 0 76.759995 2.9850074999999947 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf48 1.70550344452 0 77.040001 2.5649984999999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf49 1.65600317448 0 77.619995 1.6950074999999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf50 1.75498605481 0 77.180008 2.3549879999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf51 1.85825452695 0 77.340004 2.11499400000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf52 1.8290896189 0 77.199997 2.3250045000000057 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf53 1.68659789846 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf54 1.85072010812 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf55 1.65800824851 0 77.080002 2.50499700000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf56 1.98484848485 0 77.300003 2.1749954999999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf57 1.70550344452 0 77.379997 2.0550044999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf58 1.58875675284 0 76.819992 2.8950120000000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf59 1.85072010812 0 76.959999 2.6850015000000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf60 1.70550344452 0 77.259995 2.2350074999999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf61 1.90544418837 0 76.959999 2.6850015000000056 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf62 1.98484848485 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf63 1.64654492165 0 76.779999 2.9550014999999945 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf64 1.7451710542 0 76.840004 2.86499400000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf65 1.80736234275 0 76.979996 2.655006 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf66 1.98484848485 0 77.520004 1.8449939999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf67 1.90544418837 0 76.919998 2.74500299999999 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf68 1.62330389945 0 76.940002 2.7149969999999897 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf69 1.733572022 0 77.980003 1.1549955000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf70 1.82847903192 0 77.279999 2.2050014999999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf71 1.75805416249 0 76.880005 2.8049925000000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf72 1.79728066937 0 77.019997 2.5950044999999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf73 1.90544418837 0 76.780006 2.9549909999999997 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf74 1.91812212738 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf75 1.80736234275 0 77.0 2.625 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf76 1.85072010812 0 77.139999 2.4150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf77 1.60036156459 0 77.479996 1.9050060000000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf78 1.69547301219 0 77.339996 2.115006000000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf79 1.80736234275 0 77.340004 2.11499400000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf80 1.64654492165 0 76.900002 2.774996999999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf81 1.70550344452 0 76.940002 2.7149969999999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf82 1.70550344452 0 76.759995 2.9850074999999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf83 1.75805416249 0 76.82 2.8950000000000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf84 1.91009546227 0 76.779991 2.955013500000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt deleted file mode 100644 index 31885ce6fb9f310898aaeb3df7bc4c021152d2d6..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ /dev/null @@ -1,380 +0,0 @@ -+++++ -conf1 1 0 78.75 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf1 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf2 1.561580129 0 78.279991 0.7050135000000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf3 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf4 1.62843286633 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf5 1.561580129 0 78.199997 0.8250045000000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf6 1.5 0 78.840004 0.3099960000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf7 1.63231514248 0 78.180008 0.8549879999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf8 1.561580129 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf9 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf10 1.68452312305 0 78.55999 0.5900100000000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf11 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf12 1.58691558324 0 78.480003 0.6699970000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf13 1.58691558324 0 78.400002 0.524996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf14 1.63231514248 0 78.119995 0.9450074999999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf15 1.58691558324 0 78.559998 0.5900020000000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf16 1.58691558324 0 78.400009 0.5249865000000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf17 1.540499209 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf18 1.70550344452 0 77.979996 1.1550060000000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf19 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf20 1.80498002224 0 77.479996 1.9050060000000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf21 1.7496306648 0 78.060005 1.0349924999999942 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf22 1.80498002224 0 77.500008 1.874988000000009 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf23 1.7855629355 0 77.639999 1.6650014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf24 1.81876478645 0 77.499992 1.875011999999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf25 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf26 1.81876478645 0 77.580002 1.7549970000000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf27 1.73138014145 0 77.879997 1.3050044999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf28 1.7357694593 0 78.040001 1.0649984999999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf29 1.59477017142 0 78.139999 0.9150014999999954 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf30 1.7855629355 0 77.5 1.875 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf31 1.62843286633 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf32 1.80498002224 0 77.540001 1.8149984999999944 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf33 1.70550344452 0 77.659996 1.63500599999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf34 1.98484848485 0 77.300003 2.1749954999999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf35 1.98484848485 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf36 1.98484848485 0 77.520004 1.8449939999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf37 1.733572022 0 77.980003 1.1549955000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt deleted file mode 100644 index 76d3b9342feb04cdbfa5e97da3796c8e89226f13..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ /dev/null @@ -1,14260 +0,0 @@ -+++++ -conf1 1 0 78.75 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf1 1.79680900793 0 78.44000105 0.46499842499999033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf2 2.14833063686 0 78.422500275 0.49124958749999337 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf3 2.3343756992 0 78.279499775 0.7057503374999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf4 2.0051937949 0 78.307999725 0.6630004124999971 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf5 2.09572432924 0 78.17899945 0.8565008249999906 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf6 2.1510303661 0 78.341000325 0.613499512500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf7 1.59477017142 0 78.099998 0.975003000000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf8 2.40933283482 0 78.21199995 0.8070000749999906 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf9 2.44133377904 0 78.2895008 0.6907488000000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf10 2.10377358491 0 78.520998975 0.6290010250000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf11 1.99206710006 0 78.244000225 0.7589996625000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf12 2.49016877877 0 78.264999025 0.7275014625000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf13 1.92297058901 0 78.354999825 0.5925002625000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf14 3.28260543861 0 78.11449965 0.9532505250000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf15 2.1510303661 0 78.34250065 0.6112490249999922 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf16 2.70433982991 0 77.53800065 1.8179990249999989 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf17 2.50154135982 0 78.228999325 0.7815010124999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf18 2.97747502888 0 78.058500175 1.037249737499991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf19 3.16135199481 0 77.658500425 1.6372493625000004 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf20 2.21882120614 0 78.016000725 1.1009989125000033 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf21 2.97779876864 0 77.5135 1.85475000000001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf22 2.36542510121 0 77.562499475 1.7812507875000065 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf23 3.84266973008 0 77.68899955 1.5915006749999918 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf24 2.83830787766 0 77.668499175 1.6222512375000093 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf25 2.97779876864 0 77.433499775 1.974750337499998 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf26 2.99538587737 0 77.850000275 1.349999587500001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf27 2.25834132162 0 78.0424999 1.0612501500000064 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf28 2.91566305143 0 77.9264999 1.235250150000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf29 3.45487014523 0 77.562000175 1.7819997375000085 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf30 2.98884796399 0 77.723499625 1.5397505624999965 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf31 3.59076159103 0 77.520999325 1.843501012499992 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf32 2.11760015464 0 77.5674998 1.7737503000000103 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf33 3.18710451669 0 77.66400005 1.6289999250000022 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf34 3.53240628779 0 77.666998925 1.6245016124999978 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf35 1.66455840456 0 77.519997 1.8450044999999946 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf36 3.72963656481 0 77.71250025 1.5562496249999924 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf37 3.26928242088 0 77.708 1.5630000000000024 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf38 2.91566305143 0 77.997500225 1.128749662500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf39 3.37566443263 0 77.764000275 1.4789995874999988 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf40 2.16857363436 0 77.986500425 1.145249362499996 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf41 3.45051734728 0 77.757000325 1.4894995124999895 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf42 2.84007388684 0 77.906500025 1.2652499625000004 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf43 2.5234379499 0 77.836999325 1.3695010125000096 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf44 1.99424293451 0 77.279999 2.2050014999999945 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf45 2.22694008233 0 77.5514999 1.797750150000006 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf46 2.51071449035 0 78.122998625 0.9405020625000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf47 3.70240971999 0 77.544499675 1.808250487500004 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf48 3.14863117051 0 77.64799975 1.6530003750000049 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf49 3.542000097 0 78.033500025 1.0747499625000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf50 3.11638574781 0 77.679000075 1.606499887499993 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf51 3.24180661389 0 78.1409995 0.9135007499999901 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf52 3.59736474838 0 77.7224991 1.5412513500000102 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf53 2.15509270644 0 78.2715006 0.717749100000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf54 2.07523892514 0 77.4340001 1.9739998499999913 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf55 2.89104044336 0 77.61650025 1.7002496249999979 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf56 2.40299088586 0 77.75 1.5 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf57 2.57467948341 0 77.748999225 1.5015011624999914 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf58 3.4918718623 0 77.708 1.5630000000000024 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf59 2.41549528692 0 77.5825003 1.75124954999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf60 3.11638574781 0 77.53900055 1.8164991750000041 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf61 3.59076159103 0 77.694499775 1.5832503375000044 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf62 3.45487014523 0 77.76799985 1.473000225000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf63 3.4918718623 0 77.537499975 1.8187500374999956 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf64 1.80498002224 0 77.779999 1.4550014999999945 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf65 2.16857363436 0 77.798499675 1.4272504874999967 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf66 2.64786391927 0 77.61000005 1.7099999250000053 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf67 3.59736474838 0 77.7579992 1.4880011999999994 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf68 3.25529306887 0 77.687499775 1.5937503374999906 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf69 2.86173769935 0 77.427999275 1.9830010874999928 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf70 2.38769738778 0 77.39400115 2.033998275000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf71 4.89920842557 0 78.051000325 1.048499512499994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf72 4.79205397957 0 78.038000275 1.0679995874999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf73 3.09875539212 0 77.273500075 2.2147498874999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf74 3.31960879381 0 77.9119997 1.2570004500000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf75 4.19140856656 0 78.20000065 0.8249990249999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf76 3.37358534144 0 77.6074995 1.7137507499999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf77 2.85576960676 0 78.0505001 1.0492498500000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf78 2.81931594457 0 77.081999925 2.502000112499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf79 4.27983318032 0 78.214000325 0.803999512499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf80 2.53347556111 0 77.053999875 2.5440001874999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf81 3.44574192026 0 78.04700015 1.0544997749999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf82 3.84374835529 0 77.871499425 1.3177508625000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf83 4.2459932947 0 78.186999875 0.8445001875000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf84 3.89292123452 0 77.7725 1.4662500000000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf85 3.84374835529 0 77.8894992 1.2907511999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf86 3.23567852554 0 78.021000575 1.0934991374999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf87 2.56431338585 0 78.252500825 0.7462487625000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf88 3.52603923588 0 78.007000675 1.1144989874999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf89 3.31960879381 0 77.615000125 1.7024998125000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf90 2.73868451644 0 76.974999125 2.662501312500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf91 2.73485576623 0 77.002500775 2.621248837499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf92 4.71776016882 0 78.118500375 0.9472494375000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf93 4.50808625337 0 78.1879999 0.8430001500000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf94 2.4335243744 0 77.39600035 2.0309994750000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf95 2.28224185165 0 77.94599965 1.2060005249999932 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf96 4.69511411122 0 78.0050009 1.1174986500000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf97 4.52821122249 0 78.1555004 0.891749400000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf98 2.94070867727 0 77.411500725 2.0077489125000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf99 3.51112161664 0 77.876499375 1.3102509375000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf100 3.91238549312 0 77.8210001 1.3934998499999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf101 3.43063493306 0 77.99849985 1.1272502249999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf102 2.61175481887 0 77.0305003 2.57924955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf103 2.1510303661 0 78.399499725 0.5257504125000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf104 2.33079967234 0 77.677000425 1.6094993624999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf105 2.38915544409 0 77.3554993 2.091751049999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf106 3.18821757541 0 77.948999775 1.2015003374999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf107 2.38769738778 0 77.397999975 2.028000037499993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf108 3.30518638085 0 78.03400095 1.0739985749999903 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf109 3.18821757541 0 77.3555 2.0917499999999905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf110 3.93148091658 0 78.255500075 0.7417498874999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf111 3.47768289392 0 77.605998825 1.7160017624999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf112 4.35330525145 0 78.23450025 0.7732496250000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf113 2.16634367244 0 78.242499975 0.7612500374999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf114 4.49698282055 0 78.02350005 1.0897499250000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf115 3.76294547016 0 77.81649915 1.4002512750000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf116 3.10050944598 0 78.053000775 1.0454988374999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf117 3.76294547016 0 77.813500225 1.4047496625000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf118 2.24888375674 0 76.91599985 2.7510002249999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf119 2.90176147275 0 77.430000025 1.9799999625000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf120 4.36510071125 0 78.1990006 0.8264990999999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf121 2.73868451644 0 77.863499275 1.3297510875000071 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf122 2.14345461127 0 77.9280001 1.2329998499999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf123 2.58610616022 0 78.1555008 0.891748800000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf124 2.76355621145 0 77.811499725 1.4077504124999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf125 3.1705014109 0 76.864499725 2.8282504124999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf126 4.59661176964 0 77.974500725 1.163248912500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf127 3.46580033852 0 77.617499075 1.6987513875000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf128 3.10050944598 0 77.380999775 2.0535003374999903 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf129 3.4869813633 0 77.989000375 1.141499437499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf130 4.82158183139 0 78.06699995 1.024500075000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf131 3.26850208106 0 77.3024992 2.1712512000000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf132 2.91566305143 0 77.826999475 1.3845007875000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf133 3.89292123452 0 77.9070005 1.2644992500000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf134 2.22332436958 0 77.196999925 2.3295001125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf135 3.96334183535 0 77.79850045 1.4272493249999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf136 4.27315546194 0 78.25700055 0.7394991749999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf137 1.60552156231 0 78.099998 0.975003000000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf138 3.6515641406 0 77.886500175 1.2952497375000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf139 4.33470175092 0 78.1860006 0.8459991000000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf140 4.00390160999 0 78.03400095 1.0739985749999903 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf141 3.06923723471 0 77.48800035 1.8929994750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf142 2.51025423632 0 77.7379995 1.5180007499999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf143 2.2071979046 0 77.258999675 2.2365004875000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf144 2.24152130544 0 78.266000775 0.7259988375000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf145 1.73138014145 0 78.139999 0.9150014999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf146 2.07194193579 0 78.4889999 0.661000100000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf147 1.95369833697 0 78.272500375 0.7162494374999895 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf148 2.48204433972 0 78.247500575 0.7537491374999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf149 2.6038089013 0 78.27349985 0.7147502250000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf150 2.28186154535 0 78.229499425 0.7807508624999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf151 1.59477017142 0 78.099998 0.975003000000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf152 1.82360897249 0 78.5300003 0.6199997000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf153 1.76439678846 0 78.203000575 0.8204991374999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf154 1.72755765819 0 78.1744998 0.86325029999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf155 2.10377358491 0 78.471999975 0.6780000249999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf156 2.89256716615 0 78.17850095 0.8572485749999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf157 2.18361621336 0 78.29599945 0.6810008250000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf158 1.561580129 0 78.119995 0.9450074999999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf159 1.95369833697 0 78.40800075 0.512998875000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf160 2.77391276825 0 78.2810004 0.7034994000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf161 2.10636242153 0 78.433500125 0.4747498125000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf162 1.73138014145 0 78.159996 0.88500599999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf163 2.16634367244 0 78.23250085 0.7762487250000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf164 2.53160205673 0 78.22899975 0.7815003750000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf165 1.75142089738 0 78.1519998 0.897000300000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf166 3.59736474838 0 77.738998525 1.5165022125000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf167 2.55188860547 0 78.203000475 0.8204992875000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf168 2.9094681628 0 77.8784995 1.3072507499999944 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf169 2.12252361347 0 78.12700005 0.9344999249999901 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf170 3.77643621697 0 77.486499575 1.8952506375000056 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf171 3.72963656481 0 77.683000175 1.6004997374999945 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf172 3.49811600913 0 77.6029999 1.7205001499999995 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf173 2.5234379499 0 77.871999325 1.3170010124999934 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf174 1.82360897249 0 78.489000675 0.6609993250000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf175 2.15086143425 0 77.46950055 1.9207491749999903 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf176 3.11638574781 0 77.4890009 1.8914986500000097 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf177 3.35910310464 0 77.72399955 1.539000674999997 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf178 2.60281905984 0 77.490499375 1.8892509375000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf179 2.24796080644 0 77.690000325 1.5899995125000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf180 3.25529306887 0 77.705499025 1.566751462500008 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf181 3.46799468161 0 77.7030001 1.5704998500000045 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf182 2.63411159335 0 77.6099998 1.7100003000000044 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf183 2.99538587737 0 77.82599925 1.386001125000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf184 3.44574192026 0 78.058 1.0379999999999896 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf185 3.33253479352 0 78.095999875 0.9810001874999941 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf186 3.49811600913 0 77.75849985 1.4872502249999897 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf187 2.35829610893 0 77.731499975 1.5277500374999917 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf188 3.04907663832 0 78.093499975 0.9847500374999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf189 2.99538587737 0 77.74999995 1.5000000749999955 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf190 3.542000097 0 78.0260006 1.085999099999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf191 3.28812300212 0 77.769999725 1.4700004124999921 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf192 2.87133870651 0 77.894500375 1.2832494374999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf193 2.73950635808 0 77.528499225 1.8322511624999933 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf194 2.91566305143 0 77.9804998 1.154250299999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf195 1.7977525785 0 78.559999475 0.5900005250000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf196 2.24152130544 0 78.173999375 0.8640009375000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf197 3.20989056068 0 78.09300135 0.9854979750000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf198 3.48564996739 0 78.1090001 0.9614998499999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf199 2.89256716615 0 78.160499175 0.8842512375000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf200 2.25834132162 0 78.08599985 0.9960002250000102 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf201 2.97779876864 0 77.515499675 1.8517504874999986 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf202 3.3268673034 0 78.0785003 1.0072495499999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf203 3.06580144126 0 77.74699955 1.5045006750000027 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf204 3.02884415997 0 78.163000075 0.880499887500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf205 2.15594095941 0 77.552500875 1.7962486874999897 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf206 3.66721064524 0 77.7285002 1.5322497000000013 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf207 2.11760015464 0 77.764000125 1.4789998125000068 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf208 2.59640338923 0 78.2045001 0.8182498499999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf209 3.59736474838 0 77.5884993 1.7422510500000072 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf210 2.52227594543 0 77.390499625 2.039250562499994 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf211 2.72018747284 0 77.93899955 1.2165006749999918 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf212 2.97779876864 0 77.454499975 1.9432500374999933 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf213 2.89104044336 0 77.838500175 1.3672497375000106 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf214 2.21882120614 0 78.002499375 1.121250937500001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf215 2.55046272045 0 77.49200035 1.8869994750000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf216 3.16135199481 0 77.674499775 1.6132503374999985 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf217 2.6038089013 0 77.727500675 1.5337489874999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf218 3.77643621697 0 77.722000675 1.5419989874999942 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf219 2.74609889077 0 77.9474999 1.2037501500000047 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf220 3.39701846598 0 77.776500325 1.460249512499999 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf221 3.84266973008 0 77.48100015 1.9034997750000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf222 2.41443054714 0 77.704000325 1.5689995125000067 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf223 2.13476659554 0 78.07549955 1.0117506749999947 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf224 3.72963656481 0 77.4985005 1.8772492499999913 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf225 1.95369833697 0 77.791000025 1.4384999624999963 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf226 2.57467948341 0 77.77599975 1.4610003750000047 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf227 3.84266973008 0 77.6624994 1.6312508999999977 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf228 3.06580144126 0 77.5534998 1.794750300000004 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf229 2.25536569046 0 77.7035 1.569749999999992 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf230 2.41443054714 0 77.626499625 1.6852505625000092 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf231 3.24718776399 0 78.105000425 0.9674993624999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf232 3.28260543861 0 78.098500625 0.9772490625000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf233 2.43612195203 0 78.246499875 0.7552501875000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf234 3.13780880773 0 76.97750055 2.658749174999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf235 3.3268673034 0 78.065999625 1.0260005624999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf236 2.56431338585 0 78.25149865 0.7477520249999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf237 4.58200012548 0 78.19099955 0.8385006749999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf238 4.82158183139 0 78.057500275 1.038749587500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf239 3.542000097 0 78.080500425 1.0042493625000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf240 3.78112865648 0 77.95499995 1.192500074999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf241 4.59198750865 0 78.1270008 0.934498799999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf242 3.14537663121 0 78.0940003 0.983999549999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf243 3.52150537634 0 77.5499994 1.8000008999999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf244 3.13780880773 0 76.9555004 2.691749399999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf245 4.44155567719 0 78.23850005 0.7672499250000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf246 3.53377364881 0 77.57250005 1.7662499249999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf247 3.45138702919 0 78.04249975 1.061250374999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf248 2.68682935802 0 78.22300035 0.7904994749999901 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf249 3.48564996739 0 78.1284998 0.9322502999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf250 3.06923723471 0 78.0770008 1.00949880000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf251 4.79877127275 0 78.10549985 0.9667502249999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf252 4.27315546194 0 78.240000375 0.7649994375000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf253 4.22633629753 0 78.162500175 0.8812497374999921 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf254 3.05997821259 0 78.132000325 0.9269995124999895 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf255 2.89256716615 0 78.13600105 0.9209984249999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf256 3.91238549312 0 77.926999475 1.2345007874999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf257 4.77617552809 0 78.092500275 0.9862495874999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf258 5.01218173084 0 78.113499925 0.9547501125000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf259 2.88586161931 0 78.1765001 0.8602498500000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf260 3.6350853616 0 77.53449935 1.8232509749999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf261 2.97747502888 0 78.116499725 0.9502504125000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf262 2.28262228473 0 77.182000175 2.3519997375000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf263 3.06923723471 0 77.48500025 1.8974996250000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf264 3.51112161664 0 77.910499175 1.2592512375000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf265 2.85576960676 0 78.001500175 1.1227497374999942 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf266 2.48881101405 0 78.018000625 1.0979990625000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf267 4.60279195815 0 78.146000725 0.9059989125000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf268 2.12995187868 0 77.7119994 1.5570009000000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf269 3.13493539756 0 77.98199935 1.152000975000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf270 2.89378970586 0 78.0650002 1.0274997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf271 2.27126418908 0 77.665500325 1.6267495125000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf272 3.6583930271 0 77.962000625 1.1819990624999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf273 3.44574192026 0 78.068000425 1.0229993625000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf274 4.37206912378 0 78.17450055 0.8632491749999929 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf275 3.22311222914 0 77.244499575 2.258250637499991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf276 4.19140856656 0 78.261499975 0.73275003749999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf277 2.72831704128 0 77.88249915 1.3012512749999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf278 2.87888443598 0 77.3114994 2.1577508999999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf279 3.23567852554 0 78.033500475 1.0747492875000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf280 4.623773346 0 78.131000475 0.92849928750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf281 3.18636290921 0 77.2869997 2.1945004500000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf282 3.02884415997 0 78.127999625 0.9330005624999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf283 2.33079967234 0 77.693999825 1.5840002624999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf284 3.31800009086 0 77.2995003 2.175749549999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf285 2.54123316747 0 77.18099925 2.353501125000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf286 4.20879996926 0 78.112000075 0.9569998875000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf287 3.91238549312 0 77.849500425 1.3507493624999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf288 3.23567852554 0 77.448000525 1.9529992125000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf289 4.36510071125 0 78.22050075 0.794248875000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf290 2.24152130544 0 78.317000025 0.649499962500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf291 2.0941218638 0 78.053001275 1.0454980875000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf292 2.48971602595 0 77.795999775 1.4310003375000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf293 2.35323022394 0 77.025 2.5874999999999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf294 2.61724582801 0 78.211499625 0.8077505624999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf295 3.04365492811 0 77.019500125 2.59574981250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf296 2.95498685009 0 77.960999875 1.1835001875000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf297 4.60279195815 0 78.144500275 0.9082495875000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf298 3.11780484681 0 77.582999275 1.7505010874999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf299 2.18431284582 0 78.0480003 1.0529995500000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf300 4.69591915468 0 78.0425009 1.0612486500000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf301 2.7974298081 0 77.64500005 1.6574999250000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf302 3.96334183535 0 77.769499775 1.4707503375000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf303 2.24630140357 0 77.273000925 2.2154986124999922 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf304 2.48971602595 0 77.8425003 1.3612495500000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf305 4.90008498647 0 78.0524994 1.0462508999999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf306 4.06752993595 0 78.22199955 0.7920006749999899 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf307 3.42634295097 0 77.590999925 1.7385001124999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf308 2.75993575613 0 78.089500225 0.9907496625000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf309 2.18361621336 0 78.335000675 0.6224989874999949 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf310 2.73376380311 0 78.257501 0.7387484999999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf311 2.10377358491 0 78.460000625 0.6899993749999936 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf312 2.44133377904 0 78.269500125 0.72074981250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf313 2.24152130544 0 78.299499075 0.6757513875000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf314 2.28186154535 0 78.26600015 0.7259997750000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf315 1.82360897249 0 78.52550125 0.6244987499999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf316 1.92405135546 0 78.307500225 0.6637496625000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf317 2.59394423726 0 78.3124998 0.6562503000000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf318 3.28260543861 0 78.185500225 0.8467496625000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf319 2.40933283482 0 78.194999875 0.8325001875000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf320 1.95369833697 0 78.36700095 0.5744985749999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf321 2.53160205673 0 78.238499775 0.767250337500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf322 2.20630884648 0 77.766000125 1.4759998124999925 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf323 2.20630884648 0 77.511499375 1.8577509375000005 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf324 3.59076159103 0 77.6839995 1.5990007500000019 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf325 2.46129952706 0 77.57799985 1.7580002250000035 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf326 2.43612195203 0 78.212499475 0.806250787499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf327 2.97779876864 0 77.555500275 1.7917495875 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf328 3.35910310464 0 77.75199935 1.4970009749999917 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf329 2.51071449035 0 78.129999325 0.9300010125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf330 2.80373024506 0 77.904000875 1.2689986875000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf331 2.36440403604 0 78.2385001 0.767249850000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf332 2.11760015464 0 77.418999675 1.9965004875000076 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf333 2.9094681628 0 77.7544996 1.493250599999996 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf334 3.49811600913 0 77.752500125 1.4962498125000039 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf335 2.46129952706 0 77.653499775 1.6447503374999997 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf336 2.41230388109 0 77.85100005 1.3484999250000058 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf337 2.31738854514 0 77.7519991 1.4970013499999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf338 2.64786391927 0 77.538499825 1.8172502624999964 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf339 3.16135199481 0 77.70349975 1.569750374999991 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf340 2.52227594543 0 77.38650035 2.0452494749999914 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf341 2.55188860547 0 78.20650025 0.8152496249999928 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf342 1.96561700982 0 77.71699945 1.5495008249999955 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf343 3.84266973008 0 77.680000075 1.604999887500007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf344 2.70433982991 0 77.582999775 1.750500337499993 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf345 3.82389130321 0 77.602500675 1.7212489874999974 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf346 3.59736474838 0 77.740500675 1.5142489874999896 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf347 2.99342150206 0 77.7940007 1.433998950000003 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf348 2.81381236756 0 77.83299985 1.3755002250000103 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf349 1.80023417279 0 77.599998 1.725003000000001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf350 2.44133377904 0 78.330501225 0.6292481624999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf351 3.20613284165 0 78.161000225 0.8834996625000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf352 1.85888516523 0 77.180008 2.3549879999999987 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf353 3.84266973008 0 77.5400003 1.814999549999996 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf354 2.99342150206 0 77.521999425 1.8420008624999937 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf355 3.35910310464 0 77.586499975 1.745250037500007 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf356 3.45487014523 0 77.69100055 1.5884991750000026 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf357 3.16135199481 0 77.4395008 1.965748799999993 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf358 2.31347341745 0 77.3290006 2.131499099999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf359 1.952305636 0 78.136499725 0.9202504125000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf360 3.33253479352 0 78.1575004 0.8887493999999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf361 3.49811600913 0 77.593998925 1.734001612500009 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf362 3.02884415997 0 78.183499725 0.8497504124999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf363 1.69337635738 0 77.660004 1.634993999999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf364 3.44314270897 0 77.625999025 1.686001462500002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf365 3.59076159103 0 77.5799995 1.7550007500000007 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf366 2.73376380311 0 78.233000525 0.7754992125000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf367 1.99119809512 0 77.959000225 1.1864996625000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf368 1.99119809512 0 77.9495002 1.2007496999999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf369 2.91566305143 0 77.889000725 1.2914989124999963 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf370 1.89227879259 0 77.919000675 1.24649898749999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf371 2.52903050801 0 77.842999125 1.3605013124999914 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf372 2.9094681628 0 77.85900075 1.3364988749999895 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf373 2.13476659554 0 78.0249998 1.087500299999995 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf374 2.20915635682 0 78.125500425 0.9367493625000023 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf375 3.24718776399 0 78.100999375 0.973500937499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf376 3.0068963151 0 78.096499225 0.9802511624999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf377 3.542000097 0 78.04100075 1.0634988750000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf378 2.19745749962 0 77.652999675 1.6455004874999943 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf379 2.99342150206 0 77.650999025 1.6485014624999934 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf380 1.9453718091 0 77.995001075 1.132498387499993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf381 3.70240971999 0 77.7160008 1.550998799999995 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf382 2.76271987895 0 77.802500325 1.4212495125000046 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf383 2.58317907967 0 77.9939991 1.1340013500000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf384 3.59736474838 0 77.74700055 1.5044991750000065 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf385 3.18636290921 0 77.23649915 2.2702512749999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf386 3.31960879381 0 77.9584999 1.18725014999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf387 3.89292123452 0 77.87799985 1.3080002250000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf388 3.17822794726 0 77.42349975 1.9897503749999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf389 3.29564085979 0 78.0885006 0.9922490999999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf390 3.08305776402 0 78.231000325 0.7784995125000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf391 3.70341103092 0 78.023501225 1.089748162499994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf392 3.35622156523 0 77.801499775 1.4227503375000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf393 3.27045362948 0 77.406000875 2.0159986874999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf394 2.28262228473 0 77.06800045 2.5229993250000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf395 4.44155567719 0 78.2120006 0.8069991000000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf396 2.56479367867 0 76.86149995 2.832750075000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf397 2.7524465773 0 78.206000125 0.8159998124999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf398 3.04365492811 0 77.0369993 2.5695010499999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf399 3.6583930271 0 77.957500375 1.1887494375000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf400 4.50808625337 0 78.127499175 0.9337512375000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf401 3.69591437391 0 77.770499875 1.469250187500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf402 2.07194193579 0 78.439500175 0.4657497374999906 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf403 2.18431284582 0 77.059500125 2.5357498125000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf404 4.59661176964 0 78.074500025 1.0132499624999909 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf405 3.1808118349 0 78.0770005 1.0094992500000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf406 2.88343094032 0 77.0700001 2.519999849999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf407 2.28186154535 0 78.2985008 0.677248800000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf408 3.6515641406 0 77.918499575 1.2472506375000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf409 4.79205397957 0 78.042500725 1.0612489125000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf410 2.6262810908 0 77.943999425 1.2090008624999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf411 3.82870249017 0 77.869000425 1.3214993625000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf412 2.07823096988 0 77.89850065 1.2772490249999962 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf413 4.76952130091 0 78.0174999 1.0987501499999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf414 2.14731998471 0 77.153999725 2.3940004124999916 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf415 2.39521498147 0 78.326000725 0.6359989124999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf416 2.86173769935 0 77.4560001 1.9409998500000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf417 3.96334183535 0 77.82450025 1.3882496250000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf418 2.28224185165 0 78.00700015 1.1144997750000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf419 1.67627481734 0 77.660004 1.634993999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf420 2.16634367244 0 78.299500075 0.6757498875000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf421 2.7974298081 0 77.60100005 1.7234999250000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf422 2.50108446251 0 78.310998725 0.6585019124999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf423 3.26850208106 0 77.2965002 2.1802497000000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf424 2.66150192539 0 77.7190008 1.5464987999999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf425 3.09350511825 0 77.018500075 2.5972498874999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf426 2.72831704128 0 78.066001 1.0259985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf427 3.52467785267 0 78.01100045 1.108499325000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf428 2.59271640731 0 77.485999275 1.8960010875000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf429 2.49493264402 0 78.2500004 0.749999399999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf430 2.85576960676 0 78.026000425 1.0859993624999902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf431 3.76294547016 0 77.828999675 1.3815004874999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf432 2.0941218638 0 77.1139997 2.4540004500000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf433 3.69591437391 0 77.8415009 1.3627486500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf434 1.9453718091 0 78.101999725 0.9720004125000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf435 3.89292123452 0 77.844500175 1.3582497375000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf436 5.09612029865 0 77.941999725 1.2120004124999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf437 2.89256716615 0 78.16550005 0.8767499249999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf438 4.71776016882 0 78.11850035 0.9472494749999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf439 2.8298030352 0 77.880999375 1.3035009374999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf440 3.95190382569 0 78.191000375 0.8384994374999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf441 3.47768289392 0 77.62299965 1.6905005250000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf442 3.12527637116 0 78.017000175 1.0994997374999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf443 2.67371407651 0 77.793501225 1.4347481625 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf444 3.21441146697 0 78.02099995 1.0935000749999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf445 3.76294547016 0 77.8419996 1.3620006000000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf446 1.93165087062 0 77.81500075 1.4024988750000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf447 2.51025423632 0 77.7459997 1.506000450000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf448 3.53377364881 0 77.5685011 1.772248349999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf449 2.8298030352 0 77.78100075 1.453498874999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf450 2.22422719659 0 77.464500875 1.9282486875000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf451 3.09875539212 0 77.306499125 2.165251312499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf452 2.6974145891 0 77.7034999 1.5697501500000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf453 2.22332436958 0 77.232499975 2.276250037500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf454 4.98028095379 0 77.969999525 1.1700007124999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf455 2.13926887138 0 77.15200025 2.3969996249999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf456 2.28224185165 0 77.863500975 1.3297485375000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf457 2.52227594543 0 78.082000875 1.0019986874999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf458 2.17563676084 0 77.40750025 2.0137496250000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf459 3.01949394303 0 77.348998825 2.101501762500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf460 4.28586099646 0 78.075500675 1.011748987499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf461 2.29948783073 0 77.73800035 1.5179994750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf462 2.1510303661 0 78.33949995 0.615750074999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf463 2.43612195203 0 78.2609999 0.7335001499999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf464 2.06569084995 0 78.2925 0.686249999999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf465 2.28186154535 0 78.222999975 0.79050003750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf466 1.95369833697 0 78.300999975 0.6735000375000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf467 1.95369833697 0 78.4525005 0.6974995000000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf468 2.67606536954 0 78.300500525 0.6742492124999941 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf469 1.88123041257 0 78.36049985 0.5842502250000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf470 2.70007394171 0 78.23150005 0.7777499250000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf471 2.47665743514 0 78.271999125 0.7170013125000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf472 1.561580129 0 78.240005 0.7649925000000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf473 1.95369833697 0 78.39050035 0.5392494750000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf474 3.16135199481 0 77.47150085 1.9177487250000027 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf475 2.48136969546 0 77.644499775 1.6582503375000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf476 3.63363848948 0 77.451000325 1.9484995125000069 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf477 3.39238987381 0 78.0874994 0.993750900000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf478 2.29428673152 0 77.837999875 1.3680001875000087 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf479 1.68618253461 0 78.6015003 0.5484997000000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf480 3.53240628779 0 77.46799955 1.9230006749999973 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf481 2.60281905984 0 77.489999525 1.8900007124999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf482 2.71748837209 0 78.0590003 1.0364995500000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf483 3.45051734728 0 77.798499675 1.4272504874999967 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf484 2.89409550228 0 77.4129992 2.0055011999999977 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf485 3.53925369518 0 77.535499825 1.8217502624999966 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf486 2.98396322778 0 77.8309997 1.37850044999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf487 2.25536569046 0 77.7144989 1.553251650000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf488 2.72858886384 0 77.523000675 1.8404989874999913 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf489 3.84266973008 0 77.6484993 1.6522510500000038 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf490 2.97779876864 0 77.46650125 1.9252481250000102 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf491 2.27522664257 0 77.9424997 1.2112504500000014 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf492 2.47665743514 0 78.257500325 0.7387495125000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf493 2.44133377904 0 78.30250085 0.6712487249999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf494 2.57467948341 0 77.8404998 1.3642502999999948 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf495 1.80023417279 0 77.540001 1.8149984999999944 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf496 3.58371362677 0 78.022000175 1.0919997374999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf497 1.63231514248 0 78.120003 0.9449955000000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf498 2.25834132162 0 78.007999825 1.1130002625000017 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf499 3.89181483916 0 77.499499375 1.8757509375000012 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf500 3.16500541712 0 77.777499725 1.4587504125000024 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf501 3.36364124845 0 77.960000225 1.1849996624999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf502 2.79400512644 0 77.946000025 1.2059999624999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf503 2.97779876864 0 77.51400035 1.8539994749999948 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf504 1.9769681931 0 77.7790005 1.4564992500000074 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf505 3.53240628779 0 77.670000425 1.6199993625000033 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf506 2.74609889077 0 77.556999225 1.7895011625000024 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf507 2.52227594543 0 78.120000025 0.9449999625000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf508 3.19453962237 0 77.60100015 1.7234997749999934 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf509 3.89181483916 0 77.56599925 1.7760011249999934 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf510 2.48881101405 0 78.157000425 0.8894993624999898 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf511 3.42248933877 0 78.10499935 0.9675009750000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf512 2.24980746522 0 77.93100035 1.2284994749999925 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf513 3.06580144126 0 77.490999725 1.888500412500008 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf514 1.76439678846 0 77.9490002 1.2014996999999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf515 1.93165087062 0 77.844499 1.3582515000000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf516 3.27671427503 0 78.0164993 1.10025104999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf517 3.63363848948 0 77.651499525 1.6477507124999917 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf518 3.59736474838 0 77.765499275 1.4767510875000056 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf519 2.67110637001 0 77.492998925 1.8855016125000077 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf520 3.03690484441 0 78.17399975 0.8640003750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf521 3.82389130321 0 77.6294996 1.680750599999996 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf522 3.02516396272 0 77.7979996 1.4280006000000043 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf523 2.82425847867 0 77.53849985 1.8172502250000093 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf524 1.92906574394 0 77.805499025 1.4167514624999953 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf525 3.30478754695 0 77.53349955 1.824750674999997 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf526 2.09572432924 0 77.886498975 1.2952515375000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf527 4.77617552809 0 78.134999625 0.9225005624999909 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf528 4.60279195815 0 78.176000775 0.8609988374999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf529 3.1705014109 0 76.8984998 2.7772503000000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf530 3.28260543861 0 78.1204998 0.9442502999999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf531 2.92157268012 0 77.9825014 1.1512478999999942 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf532 3.50662601431 0 78.0274999 1.0837501500000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf533 4.44155567719 0 78.16200105 0.881998424999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf534 3.82870249017 0 77.802999825 1.4205002624999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf535 3.31800009086 0 77.221000125 2.293499812499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf536 2.16634367244 0 78.24850025 0.7522496249999904 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf537 5.01218173084 0 78.046000675 1.055998987499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf538 3.29564085979 0 78.073500675 1.014748987499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf539 2.14664674472 0 78.373999475 0.5640007874999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf540 2.19763383051 0 77.306 2.166000000000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf541 2.66564670957 0 78.26750015 0.7237497749999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf542 2.63995156218 0 78.20250095 0.8212485749999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf543 3.27045362948 0 77.920999675 1.2435004874999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf544 3.6583930271 0 77.994000975 1.1339985374999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf545 2.79400512644 0 77.5879999 1.7430001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf546 3.04399322006 0 77.280999825 2.2035002625000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf547 2.83419571826 0 77.902499575 1.2712506375000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf548 3.50662601431 0 77.949499775 1.20075033749999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf549 2.57952953365 0 78.111000225 0.9584996625000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf550 4.13446115525 0 78.0035005 1.1197492499999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf551 2.42726970587 0 77.76500105 1.4774984250000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf552 3.99339287342 0 78.29699995 0.679500075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf553 3.37358534144 0 77.605999575 1.7160006375000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf554 3.27045362948 0 77.372499225 2.066251162500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf555 2.05715694099 0 77.576500125 1.7602498125000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf556 3.96334183535 0 77.767999875 1.4730001874999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf557 1.74018108892 0 78.051500175 1.0477497374999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf558 2.98396322778 0 77.902999825 1.2705002625000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf559 5.09612029865 0 77.92550035 1.2367494750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf560 4.21982434853 0 78.17400035 0.863999475 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf561 3.20613284165 0 78.136000875 0.9209986875000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf562 4.66234332773 0 78.073500575 1.0147491375000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf563 4.82158183139 0 78.126500525 0.935249212500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf564 2.6038089013 0 77.893000175 1.285499737500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf565 2.53347556111 0 77.075499525 2.511750712500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf566 3.08305776402 0 78.165998625 0.8760020624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf567 2.32921384149 0 77.71800025 1.5479996249999957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf568 2.16019226029 0 77.293500325 2.184749512500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf569 4.69591915468 0 78.1205 0.9442499999999896 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf570 3.92134841516 0 78.143499975 0.9097500375000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf571 2.0941218638 0 78.173500425 0.8647493624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf572 4.87565925629 0 78.01400025 1.1039996250000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf573 4.14008616617 0 78.264500775 0.7282488374999971 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf574 2.37280288508 0 78.309000975 0.6614985374999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf575 4.89920842557 0 77.99249985 1.1362502249999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf576 3.6350853616 0 77.5270003 1.8344995500000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf577 4.43580061952 0 78.04050025 1.0642496250000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf578 2.06631426065 0 78.141999575 0.9120006375000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf579 3.96334183535 0 77.7960007 1.43099895000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf580 4.05728597619 0 78.16450025 0.8782496249999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf581 2.31895830908 0 77.97450015 1.1632497750000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf582 4.36510071125 0 78.1205004 0.9442494000000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf583 2.13793288512 0 77.946498825 1.2052517624999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf584 3.58371362677 0 77.994501075 1.1332483874999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf585 2.7974298081 0 77.668499975 1.6222500374999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf586 2.86533054339 0 78.151999525 0.8970007125000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf587 2.26300649679 0 77.1840002 2.3489997000000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf588 3.74288911287 0 78.276000225 0.7109996624999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf589 2.27126418908 0 77.5375003 1.8187495499999926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf590 3.36364124845 0 78.01550005 1.101749925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf591 3.76708619178 0 77.873500225 1.3147496624999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf592 3.52603923588 0 78.25900115 0.7364982749999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf593 1.63231514248 0 78.080002 1.0049970000000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf594 2.11514698074 0 78.396999575 0.5295006375000071 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf595 2.1510303661 0 78.34750035 0.6037494749999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf596 1.88123041257 0 78.4820001 0.6679999000000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf597 2.12995187868 0 78.2914997 0.6877504499999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf598 2.31935108241 0 78.26050055 0.7342491749999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf599 2.20258261036 0 78.264000675 0.7289989874999918 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf600 1.60552156231 0 78.020004 1.0949939999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf601 2.3901980036 0 78.20850115 0.8122482749999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf602 2.50154135982 0 78.21950075 0.7957488750000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf603 1.64456746106 0 78.020004 1.0949939999999998 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf604 2.29217450437 0 78.196500225 0.8302496625000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf605 1.92405135546 0 78.347999775 0.6030003374999922 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf606 2.87133870651 0 77.5960001 1.7309998500000034 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf607 2.64377420873 0 78.217499125 0.798751312499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf608 2.66590618726 0 77.57399955 1.7640006750000055 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf609 2.98396322778 0 77.766999775 1.4745003374999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf610 2.34818605648 0 77.59050045 1.73924932500001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf611 3.72963656481 0 77.524999575 1.837500637500007 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf612 2.82425847867 0 77.514999775 1.8525003374999898 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf613 3.48520639466 0 78.08999955 0.9900006749999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf614 2.5234379499 0 77.820500025 1.3942499624999982 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf615 2.16275109817 0 77.4805004 1.9042494000000048 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf616 2.64786391927 0 77.5724995 1.7662507499999904 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf617 3.44574192026 0 78.094999875 0.9825001875000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf618 3.18710451669 0 77.59249975 1.7362503749999973 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf619 2.6708458791 0 78.0805004 1.004249399999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf620 2.68156783551 0 77.567500475 1.7737492874999958 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf621 2.23420690358 0 78.265499825 0.7267502624999906 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf622 3.59736474838 0 77.67899935 1.606500975000003 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf623 2.44242238448 0 77.613500425 1.704749362500003 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf624 3.16135199481 0 77.686498925 1.5952516125000074 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf625 2.6038089013 0 78.0219999 1.092000150000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf626 2.07523892514 0 77.44449965 1.958250525000004 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf627 2.99538587737 0 77.8749992 1.3125011999999927 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf628 2.76271987895 0 77.775000225 1.4624996625000009 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf629 3.36322818328 0 77.66750085 1.6237487250000058 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf630 2.21882120614 0 78.028000175 1.0829997374999962 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf631 2.98884796399 0 77.430999175 1.978501237500005 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf632 2.98884796399 0 77.5090001 1.8614998500000084 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf633 2.24980746522 0 77.74700045 1.5044993249999976 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf634 2.35829610893 0 77.76049915 1.4842512749999983 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf635 2.82425847867 0 77.51750035 1.848749474999991 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf636 3.58371362677 0 77.9695004 1.1707493999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf637 2.9094681628 0 77.549500075 1.8007498875000039 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf638 2.12995187868 0 78.2985002 0.6772496999999902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf639 3.16500541712 0 77.782499525 1.4512507124999914 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf640 1.60552156231 0 77.68 1.6049999999999898 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf641 2.70433982991 0 77.605499025 1.7167514624999995 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf642 2.56840161772 0 77.600499875 1.7242501875000045 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf643 2.37362548074 0 77.542498575 1.8112521375000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf644 2.74609889077 0 77.627999375 1.6830009374999975 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf645 3.24180661389 0 78.065500075 1.0267498874999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf646 3.77591555364 0 77.65950015 1.6357497750000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf647 3.542000097 0 78.046499275 1.0552510874999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf648 3.59736474838 0 77.720499775 1.54425033750001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf649 2.9094681628 0 77.516499875 1.8502501875000092 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf650 2.52903050801 0 77.679000875 1.6064986875000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf651 3.35910310464 0 77.7865001 1.4452498500000033 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf652 2.29428673152 0 77.814499675 1.40325048750001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf653 3.4918718623 0 77.722500225 1.5412496624999932 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf654 3.49811600913 0 77.728000325 1.5329995125000053 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf655 3.72963656481 0 77.623000775 1.6904988375000087 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf656 2.16634367244 0 78.264499825 0.7282502624999978 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf657 3.53240628779 0 77.675500825 1.6117487624999995 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf658 3.18710451669 0 77.778000425 1.4579993624999972 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf659 2.83830787766 0 77.499500275 1.875749587499996 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf660 2.5234379499 0 77.6419991 1.66200134999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf661 3.66721064524 0 77.55650095 1.7902485749999997 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf662 3.4918718623 0 77.55699955 1.7895006749999993 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf663 4.71776016882 0 78.075000175 1.0124997375000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf664 4.32443501431 0 78.085500375 0.9967494375000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf665 2.74472283119 0 77.926499925 1.2352501124999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf666 3.80371000503 0 78.209000525 0.81149921250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf667 3.50662601431 0 77.9505009 1.1992486500000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf668 4.44155567719 0 78.136000275 0.920999587499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf669 4.28586099646 0 78.069500375 1.020749437499994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf670 2.76355621145 0 76.918499275 2.747251087499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf671 3.96334183535 0 77.812000025 1.4069999624999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf672 2.07823096988 0 77.849499275 1.3507510875000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf673 2.2071979046 0 77.255500425 2.241749362500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf674 3.91630172491 0 78.20250025 0.8212496250000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf675 2.37877965366 0 77.886000625 1.2959990625000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf676 3.48520639466 0 77.965499875 1.1767501874999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf677 3.39365096575 0 78.02050075 1.0942488750000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf678 2.7697048227 0 77.036 2.570999999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf679 2.14799364712 0 77.940999775 1.2135003375000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf680 3.1705014109 0 76.910999125 2.7585013124999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf681 3.06923723471 0 77.464000675 1.9289989875000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf682 2.35323022394 0 77.083500875 2.499748687500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf683 2.5476157254 0 77.880000375 1.3049994375000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf684 2.23238575577 0 77.9034998 1.2697502999999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf685 4.89920842557 0 77.981501175 1.152748237499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf686 3.18821757541 0 77.998500525 1.1272492125000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf687 4.82158183139 0 78.083500475 0.9997492875000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf688 1.70550344452 0 77.68 1.6049999999999898 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf689 3.6515641406 0 77.9414998 1.2127502999999962 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf690 1.99715145162 0 78.178000275 0.8579995874999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf691 3.12527637116 0 77.0050001 2.6174998499999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf692 3.74288911287 0 78.258 0.7380000000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf693 2.13793288512 0 78.1519997 0.8970004499999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf694 4.10841193347 0 78.2274998 0.7837502999999941 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf695 2.05715694099 0 77.563999325 1.7790010125000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf696 3.23567852554 0 77.442000425 1.961999362499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf697 2.38209008774 0 77.400999325 2.023501012499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf698 2.3843713611 0 77.936499725 1.2202504124999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf699 4.21982434853 0 78.240000275 0.7649995875000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf700 4.00390160999 0 78.114000325 0.9539995124999905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf701 2.44133377904 0 76.88649945 2.7952508249999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf702 3.52603923588 0 78.24050005 0.7642499250000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf703 2.0579298361 0 77.67449925 1.613251125000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf704 3.05077488617 0 77.491999575 1.8870006375000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf705 3.23338646124 0 77.2919998 2.187000300000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf706 3.96334183535 0 77.813999325 1.4040010125000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf707 2.8298030352 0 77.9330004 1.225499400000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf708 4.60279195815 0 78.125999675 0.9360004874999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf709 3.22311222914 0 77.320499575 2.1442506375000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf710 4.35330525145 0 78.2800002 0.7049996999999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf711 3.31800009086 0 77.253500775 2.2447488374999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf712 3.1270605866 0 78.171000475 0.8684992875000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf713 2.75882367755 0 77.609999475 1.7100007875000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf714 4.69511411122 0 77.9954996 1.1317505999999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf715 3.53377364881 0 77.552999475 1.7955007875000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf716 2.28357392256 0 77.6785006 1.60724909999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf717 2.24630140357 0 77.262000425 2.231999362500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf718 4.06752993595 0 78.1885008 0.8422488000000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf719 2.35323022394 0 76.881001075 2.8034983874999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf720 2.28262228473 0 77.0555006 2.541749099999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf721 2.94386966745 0 78.093498825 0.9847517625000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf722 2.38769738778 0 77.418500475 1.9972492874999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf723 2.29948783073 0 77.775 1.4624999999999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf724 2.88586161931 0 78.1530001 0.8954998500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf725 4.27315546194 0 78.2205006 0.7942491000000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf726 3.64524581982 0 78.02450025 1.088249624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf727 3.87145936777 0 78.22300045 0.790499324999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf728 1.92027397741 0 77.3279997 2.13300044999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf729 3.41779112394 0 78.0579999 1.038000150000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf730 3.6515641406 0 77.90049995 1.2742500750000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf731 1.79680900793 0 78.42599915 0.48600127500000667 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf732 1.76031414454 0 78.606999125 0.5430008749999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf733 1.77170177496 0 78.338000125 0.6179998125000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf734 2.64377420873 0 78.252999875 0.7455001874999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf735 2.07194193579 0 78.442999125 0.4605013124999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf736 2.48881101405 0 78.138501425 0.9172478624999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf737 2.5635932838 0 78.22050075 0.794248875000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf738 2.36440403604 0 78.284500375 0.6982494375000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf739 1.99061917958 0 78.33349955 0.6247506750000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf740 2.27428194775 0 78.27450025 0.713249624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf741 1.95369833697 0 78.362500225 0.5812496624999923 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf742 1.99206710006 0 78.22650005 0.7852499250000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf743 2.55141313335 0 78.265999925 0.726000112499996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf744 2.24152130544 0 78.300000325 0.6749995125000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf745 1.88123041257 0 78.385500125 0.5467498125000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf746 2.02237001209 0 78.501499625 0.6485003750000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf747 1.92405135546 0 78.301500625 0.6727490624999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf748 2.24152130544 0 78.350499675 0.599250487500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf749 2.24152130544 0 78.238999175 0.7665012374999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf750 2.67240958712 0 78.2230003 0.7904995500000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf751 2.24152130544 0 78.163999475 0.8790007875000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf752 1.88771378107 0 78.321499625 0.6427505624999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf753 2.44133377904 0 78.307999575 0.663000637500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf754 1.76439678846 0 78.19899945 0.8265008249999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf755 2.24152130544 0 78.233000325 0.7754995124999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf756 2.92188437688 0 77.6155006 1.7017490999999936 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf757 2.35829610893 0 77.784500175 1.4482497374999923 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf758 2.67110637001 0 77.565000675 1.7774989875000102 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf759 2.54477507927 0 77.81399975 1.4040003750000096 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf760 2.52903050801 0 77.79449925 1.4332511249999982 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf761 3.35910310464 0 77.543000525 1.8104992125000052 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf762 2.98396322778 0 77.81699965 1.3995005250000006 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf763 2.27522664257 0 77.9110006 1.2584991000000088 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf764 3.46799468161 0 77.600999725 1.7235004125000088 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf765 2.72858886384 0 77.59399985 1.7340002249999955 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf766 3.59736474838 0 77.69649975 1.5802503749999985 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf767 1.81876478645 0 77.32 2.1450000000000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf768 3.22083792723 0 77.53899915 1.8165012750000074 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf769 3.45051734728 0 77.7440003 1.5089995500000057 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf770 2.86683022571 0 77.706999775 1.5645003375000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf771 2.48136969546 0 77.583999875 1.7490001874999948 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf772 3.02516396272 0 77.7714992 1.4677512000000092 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf773 3.24411063565 0 78.043999525 1.0590007124999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf774 2.98396322778 0 77.6845 1.5982500000000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf775 2.54477507927 0 77.590999775 1.7385003374999997 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf776 2.46129952706 0 77.6789995 1.606500749999995 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf777 3.01849555693 0 77.45599925 1.9410011249999926 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf778 3.89181483916 0 77.65 1.6499999999999915 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf779 2.98884796399 0 77.515500675 1.8517489875000024 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf780 3.89181483916 0 77.39899925 2.026501124999996 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf781 3.49454519355 0 77.64049945 1.6642508250000105 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf782 3.27671427503 0 77.992000675 1.1369989875000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf783 2.98884796399 0 77.547499875 1.8037501875000004 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf784 2.03862830664 0 78.50150015 0.6484998500000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf785 2.06413396769 0 77.8630003 1.330499550000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf786 3.24603315703 0 77.6249994 1.6875009000000105 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf787 2.87133870651 0 78.0169998 1.0995003000000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf788 2.07523892514 0 77.405499875 2.0167501874999942 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf789 2.95307508795 0 77.4635008 1.9297487999999916 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf790 2.72858886384 0 77.4759993 1.911001050000003 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf791 2.89409550228 0 77.545499775 1.8067503375000058 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf792 3.82389130321 0 77.5959998 1.731000299999998 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf793 3.39701846598 0 77.605000525 1.7174992125000088 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf794 3.25529306887 0 77.57349925 1.7647511250000036 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf795 3.63363848948 0 77.6295009 1.6807486500000053 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf796 3.4918718623 0 77.5595004 1.7857493999999932 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf797 3.11638574781 0 77.565498625 1.7767520624999946 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf798 3.19453962237 0 77.5310004 1.8284994000000054 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf799 2.83566297806 0 78.08499915 0.9975012749999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf800 3.52603923588 0 77.94200035 1.211999474999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf801 3.48520639466 0 77.989000675 1.1414989875000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf802 3.20613284165 0 78.133000875 0.92549868750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf803 2.85338933385 0 77.49199965 1.8870005250000048 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf804 3.30478754695 0 77.596500125 1.7302498124999914 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf805 1.92906574394 0 77.572499625 1.766250562499991 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf806 1.8683963672 0 78.3395001 0.6157498500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf807 3.58371362677 0 78.007500225 1.1137496624999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf808 2.6336049908 0 77.66750125 1.6237481249999988 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf809 3.77591555364 0 77.58999985 1.7400002250000028 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf810 3.07993730408 0 77.98450015 1.1482497749999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf811 1.93165087062 0 77.84100025 1.36349962500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf812 2.64914454991 0 78.053000125 1.0454998125000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf813 3.01849555693 0 77.412500075 2.0062498875000045 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf814 1.9769681931 0 77.975500125 1.1617498125000054 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf815 3.42248933877 0 78.078999 1.006501500000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf816 2.63411159335 0 77.61799985 1.6980002249999941 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf817 3.36364124845 0 78.058499125 1.037251312500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf818 2.43612195203 0 78.1924998 0.8362503000000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf819 3.71194409149 0 77.449999775 1.9500003375000077 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf820 3.28812300212 0 77.5774994 1.7587509000000097 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf821 4.13446115525 0 77.995500225 1.131749662499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf822 4.43580061952 0 78.0010006 1.1234991000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf823 4.49698282055 0 78.079500425 1.0057493624999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf824 3.27045362948 0 77.385499225 2.0467511624999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf825 2.41549528692 0 77.288999575 2.191500637499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf826 3.91238549312 0 77.974000825 1.1639987624999932 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf827 3.23338646124 0 77.27799945 2.2080008250000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf828 2.92157268012 0 77.929500675 1.2307489875000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf829 3.04365492811 0 77.0490005 2.551499249999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf830 3.6350853616 0 77.55699955 1.7895006749999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf831 3.14321067355 0 77.30549915 2.1667512749999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf832 2.81352330076 0 78.131999375 0.9270009374999901 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf833 2.51025423632 0 77.7514998 1.4977502999999928 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf834 1.98484848485 0 76.900002 2.774996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf835 5.01218173084 0 78.1185006 0.9472490999999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf836 2.81757564846 0 77.3214996 2.1427506000000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf837 3.31800009086 0 77.314999975 2.1525000374999905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf838 3.50662601431 0 78.001000625 1.1234990624999952 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf839 2.37444864695 0 77.68899985 1.5915002249999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf840 4.21982434853 0 78.1680008 0.8729987999999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf841 2.53629102275 0 77.1980002 2.3279997000000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf842 1.67627481734 0 77.219994 2.2950090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf843 4.60279195815 0 78.109000275 0.9614995875000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf844 3.53377364881 0 77.57350025 1.7647496250000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf845 3.91798251104 0 78.2325003 0.7762495500000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf846 4.36510071125 0 78.20000035 0.8249994750000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf847 3.1270605866 0 78.18399965 0.8490005249999939 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf848 2.58610616022 0 78.1775008 0.8587487999999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf849 2.07823096988 0 77.853500125 1.3447498125000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf850 3.96334183535 0 77.8455002 1.3567496999999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf851 4.44155567719 0 78.2215 0.792749999999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf852 2.22422719659 0 77.0174992 2.5987511999999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf853 3.1705014109 0 76.985000175 2.647499737500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf854 3.69591437391 0 77.835499375 1.3717509375000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf855 3.1705014109 0 76.8904994 2.789250900000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf856 5.09612029865 0 77.963500725 1.1797489124999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf857 4.87565925629 0 78.03849965 1.0672505249999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf858 3.6515641406 0 77.90149955 1.2727506750000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf859 4.89920842557 0 78.0020002 1.1219997000000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf860 1.91009546227 0 76.919998 2.74500299999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf861 4.66234332773 0 77.9634993 1.1797510500000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf862 3.58371362677 0 78.027999725 1.0830004124999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf863 2.98396322778 0 77.876999875 1.3095001875000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf864 3.92134841516 0 78.1380001 0.9179998500000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf865 2.18431284582 0 78.071999925 1.0170001124999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf866 4.69511411122 0 78.008 1.1130000000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf867 2.76355621145 0 76.909499575 2.760750637500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf868 3.27045362948 0 77.92649975 1.2352503749999926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf869 4.98028095379 0 78.05599975 1.041000375000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf870 3.96334183535 0 77.7624998 1.4812502999999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf871 3.80371000503 0 78.204000075 0.8189998875000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf872 2.76355621145 0 77.818000175 1.3979997375000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf873 2.18431284582 0 77.952000375 1.1969994375000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf874 3.6583930271 0 77.95250055 1.1962491750000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf875 2.35323022394 0 76.93950005 2.71574992499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf876 3.35622156523 0 77.8575001 1.3387498500000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf877 2.31895830908 0 77.948000125 1.2029998125000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf878 4.00390160999 0 78.0455004 1.0567494000000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf879 3.05997821259 0 77.5605001 1.784249850000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf880 4.77617552809 0 78.16100065 0.8834990249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf881 3.18821757541 0 77.956999575 1.1895006375000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf882 2.33517186251 0 77.0050003 2.617499549999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf883 2.45556178201 0 77.736999325 1.519501012500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf884 3.44357563851 0 77.866499875 1.3252501874999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf885 3.40081490105 0 77.980000275 1.1549995875000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf886 4.28586099646 0 78.068000025 1.0229999624999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf887 2.6666849235 0 77.8174999 1.398750149999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf888 2.40933283482 0 78.249000425 0.7514993624999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf889 2.12995187868 0 78.315999275 0.6510010875000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf890 1.99061917958 0 78.5549996 0.5950003999999979 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf891 2.49016877877 0 78.22750045 0.7837493250000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf892 2.36440403604 0 78.225500475 0.786749287499994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf893 2.0051937949 0 78.266501025 0.7252484625000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf894 2.23420690358 0 78.3095012 0.6607482000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf895 1.99206710006 0 78.292000425 0.6869993625000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf896 1.88771378107 0 78.3400007 0.6149989499999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf897 2.43612195203 0 78.225499325 0.7867510124999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf898 1.540499209 0 78.159996 0.88500599999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf899 2.10636242153 0 78.417000075 0.49949988749999363 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf900 2.16360539179 0 78.338500575 0.6172491375000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf901 1.94813942115 0 78.329001025 0.6314984625000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf902 2.16634367244 0 78.270499625 0.719250562500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf903 3.70240971999 0 77.72750015 1.533749775000004 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf904 2.46129952706 0 77.588999775 1.7415003374999927 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf905 2.67110637001 0 77.60749935 1.7137509750000035 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf906 3.49454519355 0 77.474000025 1.9139999625000073 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf907 3.16720152652 0 77.601500275 1.7227495874999903 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf908 2.57467948341 0 77.621499675 1.6927504875000068 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf909 2.25834132162 0 77.974500125 1.1632498124999913 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf910 3.63363848948 0 77.506999425 1.8645008624999946 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf911 3.84266973008 0 77.47100045 1.918499324999992 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf912 3.89181483916 0 77.640499875 1.664250187499995 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf913 2.1510303661 0 78.366999625 0.5745005624999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf914 3.59736474838 0 77.5859996 1.7460006000000092 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf915 1.85888516523 0 77.279999 2.2050014999999945 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf916 3.25529306887 0 77.648500275 1.6522495874999947 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf917 2.70433982991 0 77.718998725 1.5465019124999912 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf918 2.80373024506 0 78.0499997 1.0500004499999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf919 3.45051734728 0 77.5799984 1.7550024000000093 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf920 2.98884796399 0 77.48249965 1.901250525000009 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf921 1.92906574394 0 77.702499575 1.5712506374999933 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf922 3.18710451669 0 77.611000225 1.708499662500003 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf923 3.1306351361 0 78.080499325 1.0042510124999922 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf924 2.20630884648 0 77.962500925 1.1812486124999992 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf925 3.84266973008 0 77.69300005 1.5854999250000077 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf926 2.24980746522 0 77.5304995 1.8292507499999928 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf927 2.84007388684 0 77.789499625 1.4407505624999928 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf928 1.85888516523 0 77.300003 2.1749954999999943 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf929 3.58371362677 0 77.96999965 1.1700005249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf930 3.4918718623 0 77.5699999 1.7700001500000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf931 1.96561700982 0 77.9269995 1.2345007500000094 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf932 3.11638574781 0 77.448000375 1.9529994374999902 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf933 2.27522664257 0 77.884999125 1.2975013125000103 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf934 2.48136969546 0 77.53750075 1.81874887499999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf935 1.81635233161 0 78.004500925 1.1182486124999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf936 2.45556178201 0 77.64299995 1.6605000749999945 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf937 3.59076159103 0 77.530500475 1.829249287500005 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf938 3.02516396272 0 77.670999775 1.6185003375000022 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf939 3.542000097 0 78.062499825 1.031250262499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf940 1.89424199196 0 78.40299995 0.5205000750000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf941 1.82360897249 0 77.93999975 1.2150003750000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf942 2.34818605648 0 77.39100015 2.0384997750000053 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf943 3.45487014523 0 77.540999925 1.8135001125000088 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf944 2.11694541871 0 78.096500175 0.9802497374999959 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf945 2.23238575577 0 77.707000075 1.5644998875000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf946 2.20915635682 0 78.0034997 1.1197504499999908 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf947 3.13457659127 0 77.79799975 1.4280003749999963 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf948 3.26928242088 0 77.75549965 1.4917505249999934 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf949 3.48564996739 0 78.11399995 0.9540000750000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf950 4.98028095379 0 78.00299975 1.1205003749999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf951 2.44133377904 0 76.8854997 2.7967504500000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf952 4.59661176964 0 78.033999425 1.074000862499993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf953 2.38209008774 0 77.401500575 2.0227491375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf954 2.94386966745 0 78.122500675 0.9412489875000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf955 2.8077544116 0 77.952499975 1.1962500375000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf956 4.89920842557 0 78.042499675 1.061250487499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf957 2.53629102275 0 76.9699994 2.670000899999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf958 4.79205397957 0 77.998000425 1.1279993624999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf959 2.41762758621 0 78.1275004 0.9337493999999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf960 3.13565325662 0 78.0470011 1.0544983499999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf961 3.27045362948 0 77.32399985 2.1390002249999895 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf962 3.4869813633 0 78.087499975 0.9937500374999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf963 1.88381840849 0 78.0530005 1.045499250000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf964 4.48813937768 0 78.20750085 0.8137487249999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf965 3.27045362948 0 77.946500575 1.2052491374999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf966 3.06923723471 0 77.462500025 1.9312499625000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf967 2.88343094032 0 77.0365002 2.570249699999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf968 3.05009536007 0 78.048499675 1.0522504874999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf969 2.55260214603 0 77.1134998 2.4547503000000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf970 2.84775205688 0 77.982000625 1.1519990625000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf971 2.68682935802 0 78.218001525 0.797997712499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf972 3.1705014109 0 76.964999425 2.677500862499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf973 2.73868451644 0 76.981499525 2.6527507124999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf974 4.60279195815 0 78.200501275 0.8242480875000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf975 2.6666849235 0 77.78500065 1.447499024999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf976 3.76294547016 0 77.81899995 1.3965000749999916 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf977 4.69591915468 0 78.12349835 0.9397524749999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf978 4.90008498647 0 78.1084997 0.9622504500000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf979 4.00390160999 0 77.994000575 1.1339991374999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf980 3.58371362677 0 78.021500225 1.0927496625000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf981 2.97909443198 0 78.1365005 0.9202492500000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf982 2.51025423632 0 77.759499975 1.4857500375000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf983 2.88343094032 0 77.00999965 2.610000525000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf984 3.78635617385 0 78.281000475 0.7034992875000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf985 3.91630172491 0 78.2364997 0.770250450000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf986 2.19147904018 0 77.157999825 2.388000262499993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf987 4.50808625337 0 78.13050045 0.9292493250000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf988 2.02237001209 0 78.5584997 0.5915003000000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf989 3.41779112394 0 77.9699995 1.1700007499999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf990 4.13446115525 0 77.980500875 1.1542486874999938 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf991 4.87565925629 0 78.00549905 1.116751425000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf992 2.59985404578 0 77.857500025 1.3387499625000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf993 3.89292123452 0 77.8730001 1.315499850000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf994 2.09813263107 0 77.88900045 1.2914993250000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf995 3.58559037722 0 77.913500175 1.2547497375000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf996 2.0941218638 0 77.07050035 2.519249474999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf997 3.6515641406 0 77.900000975 1.2749985375000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf998 2.55165084726 0 78.32750035 0.633749475000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf999 2.3341767432 0 78.415499575 0.5017506375000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1000 3.52603923588 0 77.950500225 1.1992496625000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1001 3.89292123452 0 77.8040005 1.4189992499999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1002 4.21982434853 0 78.181999975 0.8520000375000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1003 2.27220637801 0 77.9985003 1.1272495499999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1004 4.12325686433 0 78.208999675 0.8115004874999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1005 3.24718776399 0 78.17100015 0.8684997750000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1006 4.36510071125 0 78.194000825 0.8339987624999949 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1007 2.23202188242 0 77.88950165 1.2907475250000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1008 3.6350853616 0 77.5305006 1.8292491000000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1009 3.1705014109 0 76.911999825 2.757000262500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1010 2.63995156218 0 78.09099915 0.9885012749999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1011 2.26900612048 0 77.43999935 1.9650009750000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1012 2.95817862688 0 78.152999375 0.8955009375000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1013 3.76294547016 0 77.7829996 1.4505006000000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1014 3.1306351361 0 78.0964996 0.980250599999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1015 3.17822794726 0 78.00099945 1.1235008250000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1016 2.32921384149 0 78.343000275 0.6104995875000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1017 4.27983318032 0 78.19599975 0.8310003750000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1018 3.20200949945 0 78.057999375 1.0380009375000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1019 5.09612029865 0 77.9790006 1.1564990999999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1020 3.21441146697 0 78.006501025 1.1152484624999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1021 2.83419571826 0 77.893000175 1.285499737500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1022 2.92157268012 0 77.982500775 1.1512488374999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1023 3.70341103092 0 77.94600085 1.2059987249999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1024 4.43580061952 0 78.02299955 1.0905006750000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1025 3.21441146697 0 77.967000425 1.1744993625000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1026 3.43063493306 0 78.3300017 0.6299974500000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1027 3.05077488617 0 78.101000325 0.9734995124999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1028 2.6974145891 0 77.746499625 1.5052505625000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1029 3.75675142744 0 78.24849945 0.7522508250000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1030 3.13565325662 0 77.45699975 1.9395003750000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1031 4.76952130091 0 77.99300005 1.1354999249999906 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1032 1.63231514248 0 78.219994 0.7950090000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1033 2.16360539179 0 78.309999475 0.6600007875000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1034 2.14664674472 0 78.349999775 0.6000003374999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1035 2.03862830664 0 78.498499375 0.6515006250000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1036 1.92297058901 0 78.362000525 0.5819992125000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1037 2.14833063686 0 78.43400025 0.4739996250000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1038 1.88123041257 0 78.516999425 0.6330005750000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1039 1.94813942115 0 78.2904999 0.6892501499999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1040 2.48926343774 0 78.2200001 0.7949998500000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1041 2.18361621336 0 78.249499325 0.7507510124999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1042 2.37280288508 0 78.307500125 0.6637498124999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1043 1.89424199196 0 78.383499925 0.549750112500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1044 2.59394423726 0 78.23650015 0.7702497750000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1045 2.29217450437 0 78.15900005 0.8864999249999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1046 1.88123041257 0 78.355000775 0.5924988375000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1047 2.23420690358 0 78.266000375 0.7259994374999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1048 2.20915635682 0 78.045500025 1.0567499625000067 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1049 2.57467948341 0 77.5895 1.7407499999999985 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1050 3.72963656481 0 77.702 1.5720000000000027 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1051 2.87133870651 0 77.7084998 1.5622503000000023 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1052 3.36322818328 0 77.545000475 1.8074992875000078 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1053 2.27522664257 0 77.99050005 1.1392499250000085 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1054 3.45051734728 0 77.57399995 1.7640000749999984 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1055 2.00445999726 0 77.695999425 1.5810008625000052 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1056 3.16135199481 0 77.4580006 1.9379990999999919 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1057 2.72858886384 0 77.643 1.660499999999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1058 2.43050085973 0 78.240499875 0.7642501875000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1059 2.97779876864 0 77.5065006 1.8652491000000069 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1060 2.44242238448 0 77.486000225 1.895999662500003 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1061 2.95498685009 0 77.769999475 1.4700007874999912 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1062 2.99538587737 0 77.6839996 1.5990005999999894 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1063 2.70007394171 0 78.2305 0.7792499999999905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1064 3.11638574781 0 77.429000275 1.9814995874999894 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1065 2.73376380311 0 77.986499175 1.1452512374999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1066 2.24980746522 0 77.919499525 1.2457507124999907 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1067 2.54477507927 0 77.650499175 1.6492512375000103 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1068 3.59736474838 0 77.56399995 1.7790000750000061 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1069 1.68919856598 0 77.599998 1.725003000000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1070 2.27220637801 0 77.841999425 1.362000862500004 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1071 1.96561700982 0 77.838000275 1.3679995875000017 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1072 2.65840178604 0 77.9914997 1.1377504499999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1073 2.67110637001 0 77.647498625 1.6537520625000042 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1074 3.77591555364 0 77.45899885 1.9365017249999994 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1075 2.12995187868 0 78.3009999 0.6735001500000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1076 3.66721064524 0 77.549000725 1.8014989125000014 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1077 3.11638574781 0 77.498499925 1.8772501124999934 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1078 3.16135199481 0 77.5295002 1.8307496999999984 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1079 2.5234379499 0 77.82349965 1.3897505249999966 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1080 2.71748837209 0 77.85500125 1.3424981249999988 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1081 3.4918718623 0 77.52800005 1.8329999249999958 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1082 3.84266973008 0 77.68799935 1.5930009750000025 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1083 2.89409550228 0 77.606999775 1.7145003374999916 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1084 2.60281905984 0 77.470499775 1.91925033750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1085 2.76271987895 0 77.39050045 2.039249324999993 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1086 2.63512538337 0 77.775500225 1.4617496624999973 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1087 3.44314270897 0 77.59700025 1.7294996250000096 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1088 3.59076159103 0 77.57299945 1.7655008250000037 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1089 3.59736474838 0 77.616498975 1.7002515375000016 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1090 3.84266973008 0 77.458501075 1.9372483874999986 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1091 2.48136969546 0 77.501500725 1.8727489125000005 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1092 3.44574192026 0 78.093999125 0.9840013125000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1093 3.39701846598 0 77.636000025 1.670999962499998 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1094 2.84007388684 0 77.776000025 1.4609999624999972 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1095 2.41443054714 0 77.6015001 1.7227498500000067 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1096 3.18710451669 0 77.5355001 1.8217498500000104 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1097 2.21882120614 0 78.0529997 1.0455004499999987 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1098 1.62843286633 0 78.160004 0.884993999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1099 2.21882120614 0 78.022499125 1.091251312500006 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1100 3.24180661389 0 78.083999625 0.9990005624999938 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1101 2.72018747284 0 77.89399965 1.2840005250000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1102 2.99538587737 0 77.865000125 1.3274998125000081 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1103 3.72963656481 0 77.505000175 1.8674997374999904 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1104 2.64786391927 0 77.71150035 1.5577494750000085 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1105 2.57467948341 0 77.849499925 1.3507501124999948 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1106 3.87145936777 0 78.20300085 0.8204987250000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1107 2.65840178604 0 77.654498675 1.6432519875000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1108 3.70341103092 0 77.9179999 1.2480001500000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1109 2.58903988183 0 78.103500525 0.9697492124999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1110 2.95307508795 0 77.5050006 1.8674990999999963 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1111 2.59985404578 0 77.8775005 1.3087492500000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1112 3.11142905832 0 78.110500675 0.9592489875000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1113 2.6262810908 0 77.9474991 1.2037513499999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1114 3.57575930671 0 77.5924996 1.7362506000000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1115 3.53696827831 0 77.8979997 1.2780004500000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1116 2.76355621145 0 77.766500125 1.4752498125000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1117 3.32808014218 0 77.96799985 1.1730002250000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1118 1.8517211653 0 77.825500325 1.3867495125000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1119 3.05997821259 0 77.4799999 1.9050001500000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1120 2.69608687627 0 77.7369999 1.519500149999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1121 2.73868451644 0 76.991999625 2.637000562499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1122 3.15516049596 0 78.0829996 1.0005006000000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1123 4.66234332773 0 78.014000425 1.1039993624999909 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1124 4.49698282055 0 78.037999725 1.0680004124999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1125 3.26850208106 0 77.27399955 2.214000675000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1126 2.63995156218 0 78.168500525 0.8722492125000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1127 2.76216260511 0 77.72600055 1.5359991750000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1128 4.2459932947 0 78.1615003 0.8827495499999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1129 4.71776016882 0 78.141999875 0.9120001875000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1130 2.96779546224 0 77.037499625 2.568750562500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1131 2.13793288512 0 78.1220006 0.9419990999999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1132 2.33816231791 0 77.426000075 1.9859998874999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1133 4.13446115525 0 78.046999775 1.054500337499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1134 2.37877965366 0 77.9255001 1.2367498500000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1135 4.90008498647 0 78.032500275 1.0762495874999942 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1136 3.542000097 0 78.06050045 1.0342493249999904 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1137 3.29564085979 0 78.013500225 1.104749662499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1138 2.75993575613 0 78.140999775 0.9135003375000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1139 2.07194193579 0 78.491999825 0.6580001750000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1140 4.67427943592 0 78.136500725 0.9202489124999929 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1141 4.39310849558 0 78.14650025 0.9052496249999962 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1142 2.6666849235 0 77.769999725 1.4700004124999921 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1143 3.12848942641 0 77.2724995 2.2162507500000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1144 3.6515641406 0 77.9045 1.2682500000000019 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1145 1.91946646809 0 78.0205012 1.0942482000000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1146 2.47800196795 0 77.707 1.5645000000000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1147 3.13565325662 0 78.0715006 1.0177491000000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1148 2.27220637801 0 77.976999875 1.1595001874999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1149 4.50808625337 0 78.2275001 0.7837498499999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1150 3.3268673034 0 78.090999825 0.9885002625000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1151 3.27045362948 0 77.920499375 1.2442509374999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1152 3.41779112394 0 77.97850025 1.1572496250000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1153 3.4088575296 0 77.643500325 1.6597495124999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1154 3.84374835529 0 77.912500475 1.2562492874999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1155 2.44133377904 0 77.68549885 1.5967517249999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1156 4.50808625337 0 78.19299935 0.8355009750000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1157 3.91238549312 0 77.884499575 1.29825063749999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1158 4.27983318032 0 78.21400065 0.803999024999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1159 4.48813937768 0 78.13750035 0.9187494750000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1160 3.23567852554 0 77.40799915 2.0130012750000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1161 3.19976487082 0 78.14350035 0.9097494750000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1162 4.59198750865 0 78.08450075 0.9982488749999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1163 3.05997821259 0 78.078499975 1.0072500375000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1164 2.91566305143 0 77.81099945 1.4085008250000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1165 4.37206912378 0 78.126499725 0.9352504124999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1166 2.55141313335 0 78.257000525 0.7394992125000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1167 2.6038089013 0 77.8709997 1.318500450000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1168 3.20989056068 0 78.087999475 0.9930007874999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1169 4.69591915468 0 78.073000175 1.0154997374999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1170 4.07904045576 0 78.0680005 1.0229992500000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1171 3.70341103092 0 77.97799995 1.158000075000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1172 4.04291144613 0 78.1914999 0.8377501500000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1173 2.73868451644 0 77.900999825 1.2735002625000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1174 2.81352330076 0 78.18849925 0.84225112499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1175 2.72831704128 0 77.1470015 2.4044977499999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1176 2.02536121357 0 77.484 1.899000000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1177 2.89378970586 0 78.066000325 1.0259995124999932 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1178 4.47713386599 0 78.0374989 1.0687516499999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1179 2.28186154535 0 78.30250075 0.6712488750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1180 1.59477017142 0 78.099998 0.975003000000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1181 1.99206710006 0 78.364499875 0.5782501874999895 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1182 1.79680900793 0 78.4280002 0.4829997000000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1183 2.10636242153 0 78.437001375 0.4694979375000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1184 2.3343756992 0 78.258500675 0.7372489875000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1185 1.99206710006 0 78.21500055 0.8024991750000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1186 2.48926343774 0 78.25100025 0.7484996249999938 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1187 2.1510303661 0 78.399999275 0.5250010875000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1188 2.29217450437 0 78.2684995 0.7222507499999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1189 2.14833063686 0 78.370000125 0.5699998124999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1190 2.07194193579 0 78.4640009 0.685999099999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1191 3.59076159103 0 77.510000225 1.8599996625000017 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1192 3.72963656481 0 77.538999975 1.8165000375000062 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1193 3.06545828495 0 78.12249985 0.9412502250000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1194 2.25834132162 0 77.95550025 1.191749625 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1195 3.24180661389 0 78.09600075 0.9809988749999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1196 3.39238987381 0 78.1539999 0.8940001499999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1197 3.70240971999 0 77.582999925 1.7505001125000064 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1198 3.4918718623 0 77.696999525 1.579500712500007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1199 3.84266973008 0 77.717499325 1.5487510124999915 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1200 2.63411159335 0 77.5909994 1.7385008999999982 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1201 3.89181483916 0 77.60349975 1.7197503750000038 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1202 2.17046411412 0 78.16049905 0.8842514250000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1203 2.1510303661 0 78.32750015 0.6337497749999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1204 2.87133870651 0 77.3539998 2.0940003000000047 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1205 1.87210017687 0 78.310500475 0.6592492875000033 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1206 2.99538587737 0 77.777999625 1.4580005624999899 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1207 2.91566305143 0 77.823999825 1.389000262499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1208 2.48136969546 0 77.5454995 1.806750749999992 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1209 2.0051937949 0 78.177000725 0.8594989125000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1210 2.49470538002 0 77.80050115 1.4242482749999965 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1211 3.28812300212 0 77.6694994 1.6207508999999902 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1212 2.64786391927 0 77.750500475 1.4992492875000067 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1213 2.80373024506 0 77.881000475 1.30349928750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1214 2.07194193579 0 78.442500475 0.46124928749999583 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1215 3.59736474838 0 77.547500075 1.8037498874999969 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1216 3.16500541712 0 77.667500625 1.6237490624999964 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1217 2.24980746522 0 77.682499425 1.6012508624999953 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1218 1.74073411989 0 77.279999 2.2050014999999945 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1219 3.4918718623 0 77.587500025 1.7437499625000044 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1220 3.11142905832 0 78.137999725 0.9180004124999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1221 2.03272718838 0 77.937999625 1.218000562499995 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1222 2.95498685009 0 77.814499925 1.4032501124999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1223 3.89181483916 0 77.42200035 1.991999474999993 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1224 2.21882120614 0 78.0514997 1.0477504500000094 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1225 2.11107096401 0 77.77950025 1.4557496250000028 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1226 3.30478754695 0 77.557500175 1.788749737499998 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1227 3.16135199481 0 77.4919998 1.8870002999999969 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1228 3.542000097 0 78.091499675 0.9877504875000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1229 3.66721064524 0 77.467500125 1.9237498124999988 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1230 2.04029892443 0 77.632001075 1.6769983874999923 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1231 2.76271987895 0 77.370999175 2.0685012375000085 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1232 3.18710451669 0 77.56999975 1.7700003750000093 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1233 3.72963656481 0 77.694500375 1.583249437499994 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1234 2.98884796399 0 77.464500175 1.9282497375000034 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1235 2.30393703271 0 77.696999775 1.5795003375000078 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1236 2.57467948341 0 77.613499225 1.7047511625000027 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1237 2.84007388684 0 77.7164995 1.5502507500000036 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1238 2.1432868672 0 77.980999425 1.1535008625000103 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1239 3.84266973008 0 77.508499775 1.8622503374999937 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1240 2.24980746522 0 77.574000025 1.7639999624999945 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1241 2.41443054714 0 77.596499225 1.7302511624999966 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1242 2.79400512644 0 77.93699985 1.2195002249999902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1243 3.27045362948 0 77.3099993 2.160001049999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1244 2.45270292962 0 78.304000525 0.6689992124999904 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1245 2.76355621145 0 76.920999525 2.7435007125000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1246 2.91070503368 0 77.567500075 1.7737498875000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1247 4.60279195815 0 78.169499625 0.8707505624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1248 5.01218173084 0 78.1449999 0.9075001499999971 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1249 4.79205397957 0 78.0169998 1.0995003000000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1250 3.58371362677 0 78.0214997 1.0927504499999898 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1251 4.27315546194 0 78.194499825 0.8332502625000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1252 3.96334183535 0 77.71600015 1.550999775000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1253 2.52809669417 0 77.74900045 1.5014993250000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1254 3.23338646124 0 77.338500225 2.1172496624999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1255 3.1705014109 0 76.979499875 2.655750187499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1256 4.60279195815 0 78.067500075 1.0237498875000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1257 3.31800009086 0 77.27149965 2.2177505250000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1258 4.69511411122 0 78.07099935 1.0185009750000091 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1259 4.13446115525 0 78.039001075 1.0664983874999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1260 2.83419571826 0 77.84549965 1.3567505250000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1261 2.97909443198 0 78.13100035 0.9284994750000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1262 5.09612029865 0 78.0065002 1.1152496999999926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1263 2.51025423632 0 77.747500525 1.50374921249999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1264 4.44155567719 0 78.142999375 0.9105009374999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1265 4.66234332773 0 78.030499625 1.0792505624999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1266 4.21982434853 0 78.242999675 0.7605004875000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1267 2.23202188242 0 77.88099955 1.3035006750000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1268 4.89920842557 0 77.916500125 1.2502498125000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1269 3.93148091658 0 78.236500275 0.7702495875000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1270 4.06752993595 0 78.23250025 0.7762496249999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1271 4.28586099646 0 78.0454995 1.056750749999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1272 3.55210680761 0 77.8340006 1.373999100000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1273 3.53377364881 0 77.581000125 1.7534998124999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1274 2.55165084726 0 78.29200035 0.6869994750000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1275 2.63995156218 0 78.137500525 0.9187492125000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1276 3.18821757541 0 77.458999075 1.9365013875000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1277 3.6515641406 0 77.89600045 1.2809993249999962 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1278 3.99339287342 0 78.204501125 0.8182483125000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1279 2.31895830908 0 77.9475006 1.2037491000000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1280 2.61175481887 0 78.01199955 1.1070006750000019 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1281 4.36510071125 0 78.160500325 0.8842495124999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1282 2.18500992291 0 77.8480004 1.3529993999999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1283 4.12325686433 0 78.20700015 0.8144997750000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1284 3.6350853616 0 77.549999975 1.8000000374999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1285 3.40081490105 0 78.0040002 1.1189997000000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1286 3.9639154741 0 78.268001 0.7229985000000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1287 2.66564670957 0 78.252999475 0.7455007875000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1288 4.00390160999 0 78.072999525 1.0155007124999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1289 3.01949394303 0 77.361000075 2.0834998874999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1290 2.58610616022 0 78.197999625 0.8280005625000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1291 3.20613284165 0 78.168999925 0.8715001125000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1292 4.77617552809 0 78.1380003 0.9179995499999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1293 3.40081490105 0 78.0779995 1.0080007499999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1294 3.33253479352 0 78.0814993 1.0027510499999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1295 3.24718776399 0 78.15799955 0.8880006750000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1296 4.82158183139 0 78.0450005 1.0574992499999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1297 4.42219194672 0 78.151999825 0.8970002624999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1298 3.70341103092 0 77.894499775 1.2832503375000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1299 4.98028095379 0 77.936000225 1.2209996624999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1300 2.8077544116 0 77.9090004 1.2614994000000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1301 3.96334183535 0 77.7869993 1.4445010499999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1302 3.18821757541 0 77.96200045 1.1819993249999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1303 2.32921384149 0 78.3434994 0.6097509000000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1304 1.8683963672 0 78.375500975 0.5617485375000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1305 2.77391276825 0 78.1604992 0.8842511999999942 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1306 1.95369833697 0 78.2910009 0.6884986499999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1307 2.44133377904 0 78.239500475 0.7657492875000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1308 2.29217450437 0 78.24800045 0.7529993249999904 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1309 2.09572432924 0 78.159998775 0.885001837499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1310 2.11514698074 0 78.38300025 0.5504996250000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1311 2.29217450437 0 78.30849875 0.6622518750000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1312 2.67240958712 0 78.24099945 0.7635008249999942 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1313 2.3901980036 0 78.1964996 0.8302506000000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1314 2.80948259281 0 78.196999525 0.8295007125000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1315 2.97779876864 0 77.499500075 1.8757498874999996 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1316 3.59736474838 0 77.560998975 1.783501537499994 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1317 3.70240971999 0 77.5669998 1.7745002999999926 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1318 2.98884796399 0 77.60950015 1.7107497749999965 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1319 2.97779876864 0 77.5099994 1.8600009000000028 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1320 3.70240971999 0 77.66250015 1.6312497750000006 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1321 1.72755765819 0 78.1795002 0.8557496999999898 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1322 2.17132451316 0 78.100500225 0.974249662499993 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1323 3.37566443263 0 77.740001125 1.5149983124999906 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1324 2.57346981923 0 78.0845013 0.9982480500000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1325 2.4337406276 0 77.568500025 1.7722499624999912 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1326 2.7795432921 0 77.8360002 1.3709996999999987 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1327 3.77591555364 0 77.44349995 1.9597500749999952 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1328 3.48520639466 0 78.0400009 1.0649986500000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1329 1.96561700982 0 77.7650008 1.4774988000000064 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1330 3.53240628779 0 77.519998525 1.8450022124999919 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1331 3.1738081787 0 77.63000035 1.6799994749999954 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1332 3.49454519355 0 77.520499075 1.8442513874999946 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1333 2.86683022571 0 77.6179995 1.6980007500000056 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1334 2.53160205673 0 77.963498775 1.1797518374999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1335 2.41443054714 0 77.595499575 1.7317506374999923 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1336 2.98884796399 0 77.4784996 1.9072505999999905 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1337 2.98396322778 0 77.7334995 1.5247507500000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1338 3.77591555364 0 77.599000525 1.7264992125000092 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1339 1.91009546227 0 77.440002 1.9649969999999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1340 2.27522664257 0 77.9180006 1.2479991000000012 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1341 3.22083792723 0 77.550500025 1.7992499624999923 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1342 3.45487014523 0 77.52699985 1.8345002250000064 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1343 2.70433982991 0 77.747000325 1.5044995124999971 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1344 3.49811600913 0 77.567999575 1.7730006374999974 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1345 3.58371362677 0 78.0034993 1.1197510499999979 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1346 2.48136969546 0 77.50999955 1.8600006749999949 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1347 2.2351185922 0 77.927000625 1.2344990624999923 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1348 3.4918718623 0 77.55899955 1.7865006750000063 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1349 2.85338933385 0 77.505499775 1.8667503374999939 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1350 3.02516396272 0 77.8040009 1.4189986499999918 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1351 3.02516396272 0 77.6645 1.6282499999999942 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1352 1.7496306648 0 78.080002 1.0049970000000101 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1353 3.28260543861 0 78.17100045 0.868499325000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1354 3.45051734728 0 77.615999825 1.7010002624999956 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1355 2.98396322778 0 77.77050005 1.4692499250000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1356 2.72858886384 0 77.621999525 1.6920007124999898 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1357 2.76271987895 0 77.38550045 2.0467493250000075 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1358 2.6038089013 0 78.050499725 1.0492504125000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1359 2.72858886384 0 77.51250005 1.8562499250000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1360 2.6038089013 0 78.268500375 0.7222494374999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1361 3.89181483916 0 77.5769989 1.75950164999999 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1362 1.96279955207 0 77.769000475 1.4714992875000021 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1363 3.71194409149 0 77.4570005 1.9394992499999901 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1364 2.89409550228 0 77.564500175 1.7782497374999906 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1365 2.53864202538 0 77.92350025 1.2397496249999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1366 3.53240628779 0 77.6799991 1.6050013499999949 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1367 2.33816231791 0 77.336499475 2.120250787500005 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1368 4.48813937768 0 78.18300035 0.8504994749999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1369 4.87565925629 0 78.0275003 1.0837495500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1370 3.26850208106 0 77.29099965 2.188500524999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1371 3.52603923588 0 77.992000425 1.1369993624999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1372 2.8077544116 0 77.916000525 1.2509992124999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1373 3.45138702919 0 77.977000025 1.159499962500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1374 4.13446115525 0 77.988500575 1.142249137499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1375 2.76355621145 0 77.767000175 1.4744997374999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1376 3.10050944598 0 78.0005005 1.1242492499999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1377 3.89292123452 0 77.795000075 1.4324998874999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1378 2.6666849235 0 77.0604998 2.5342502999999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1379 3.17822794726 0 77.944999725 1.2075004124999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1380 2.63995156218 0 78.0805 1.004249999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1381 2.72831704128 0 78.0629995 1.0305007499999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1382 3.57575930671 0 77.562999725 1.780500412500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1383 2.58610616022 0 78.158999825 0.8865002625000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1384 2.67371407651 0 77.774498925 1.4632516124999952 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1385 4.47713386599 0 78.0165001 1.1002498499999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1386 3.27045362948 0 77.94099985 1.2135002250000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1387 2.50108446251 0 78.3109989 0.658501649999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1388 3.03387706496 0 77.619498875 1.6957516874999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1389 5.09612029865 0 78.0029999 1.120500149999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1390 3.89292123452 0 77.82250045 1.3912493249999898 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1391 4.59198750865 0 78.076499925 1.0102501125000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1392 3.76294547016 0 77.885000625 1.2974990624999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1393 4.69591915468 0 78.0605003 1.0342495499999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1394 4.59661176964 0 78.0494995 1.050750750000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1395 3.35622156523 0 77.832000675 1.376998987499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1396 2.52809669417 0 78.148000525 0.9029992124999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1397 4.33470175092 0 78.211999275 0.8070010875000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1398 3.22311222914 0 77.279000125 2.206499812500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1399 3.90290717352 0 78.24649945 0.7552508249999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1400 2.76216260511 0 77.7119996 1.557000600000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1401 3.6515641406 0 77.9660004 1.175999400000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1402 2.77222805663 0 77.897500075 1.2787498875000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1403 2.87435224664 0 77.01950065 2.5957490250000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1404 3.41779112394 0 77.96299935 1.180500974999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1405 3.91238549312 0 77.8559998 1.3410002999999904 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1406 4.76952130091 0 78.0144995 1.1032507500000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1407 4.67427943592 0 78.152000275 0.896999587499991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1408 2.39521498147 0 77.767500725 1.4737489124999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1409 3.76294547016 0 77.864499725 1.3282504124999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1410 4.36510071125 0 78.188500025 0.8422499625000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1411 3.27045362948 0 77.41499965 2.002500525000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1412 2.48971602595 0 77.766499475 1.475250787499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1413 1.7855629355 0 77.799995 1.4250075000000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1414 4.66234332773 0 77.9869996 1.1445005999999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1415 4.52821122249 0 78.0979993 0.9780010500000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1416 4.27983318032 0 78.155000475 0.8924992875000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1417 2.9784464594 0 77.4359997 1.9710004500000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1418 2.84775205688 0 78.013999875 1.1040001875000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1419 4.50808625337 0 78.180500775 0.8542488375000019 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1420 4.33470175092 0 78.177999575 0.8580006374999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1421 2.27314934897 0 76.994500125 2.6332498124999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1422 3.05997821259 0 77.4870001 1.8944998499999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1423 4.98028095379 0 78.02849925 1.0822511250000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1424 4.79205397957 0 78.038500425 1.0672493625000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf1425 4.90008498647 0 78.083500225 0.9997496625000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt deleted file mode 100644 index 970a2f6b4d0638f726e9ad8c273484829bece489..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ /dev/null @@ -1,5850 +0,0 @@ -+++++ -conf1 1 0 78.75 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf1 2.01998340478 0 78.6258339 0.5241660999999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf2 2.3847866076 0 78.4283332 0.48250020000000404 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf3 2.70007394171 0 78.39416715 0.5337492749999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf4 2.40933283482 0 78.3983333 0.5275000499999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf5 2.47665743514 0 78.45166625 0.6983337499999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf6 2.39521498147 0 78.377499225 0.558751162500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf7 2.03574912892 0 78.300833675 0.6737494874999896 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf8 1.66405270953 0 78.8608336 0.2891663999999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf9 2.12995187868 0 78.42249945 0.4912508249999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf10 2.12252361347 0 78.3466679 0.6049981500000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf11 2.06569084995 0 78.4441662 0.45875070000000306 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf12 3.14140798624 0 78.35166645 0.5975003250000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf13 2.24152130544 0 78.3425 0.6112499999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf14 2.29313412528 0 78.239166025 0.7662509624999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf15 2.20258261036 0 78.458333525 0.6916664749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf16 2.29217450437 0 78.475834475 0.6741655250000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf17 1.92405135546 0 78.35416605 0.5937509249999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf18 2.11514698074 0 78.5400008 0.6099991999999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf19 1.87210017687 0 78.447499675 0.45375048749999536 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf20 3.20613284165 0 78.374166925 0.5637496125000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf21 2.04639704852 0 78.4675 0.6824999999999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf22 2.96009700474 0 78.387500825 0.5437487624999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf23 3.369849424 0 78.32916615 0.6312507749999909 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf24 2.89256716615 0 78.33750065 0.6187490250000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf25 2.61874738541 0 78.4550001 0.6949998999999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf26 2.20258261036 0 78.57083385 0.5791661500000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf27 2.06257943045 0 78.325834275 0.6362485874999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf28 2.89256716615 0 78.3883325 0.5425012499999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf29 1.99206710006 0 78.3549999 0.5925001500000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf30 2.3343756992 0 78.341666825 0.6124997624999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf31 2.29217450437 0 78.297499675 0.6787504875000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf32 2.50154135982 0 78.3100002 0.6599996999999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf33 1.7977525785 0 78.7299997 0.42000030000000665 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf34 3.1306351361 0 78.3141674 0.6537488999999965 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf35 2.64377420873 0 78.43166655 0.47750017499999586 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf36 2.24152130544 0 78.42999965 0.48000052500000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf37 2.43050085973 0 78.462500325 0.6874996750000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf38 2.68419601838 0 78.230833275 0.7787500875000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf39 2.14833063686 0 78.574166375 0.5758336249999957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf40 2.1432868672 0 78.3550005 0.5924992499999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf41 1.68452312305 0 78.73333 0.41667000000000487 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf42 1.65600317448 0 78.133331 0.9250035000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf43 2.1923561844 0 78.4075006 0.5137490999999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf44 1.89424199196 0 78.535832925 0.614167075000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf45 3.42248933877 0 78.326665675 0.6350014874999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf46 1.8683963672 0 78.45916675 0.6908332500000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf47 1.94813942115 0 78.3724997 0.5662504499999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf48 1.95369833697 0 78.4008341 0.523748850000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf49 1.76031414454 0 78.810001 0.3399990000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf50 3.542000097 0 78.274166525 0.7137502125000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf51 2.14664674472 0 78.50083355 0.6491664500000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf52 1.92405135546 0 78.580833475 0.5691665249999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf53 1.98628801958 0 78.225000025 0.7874999625000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf54 1.58691558324 0 78.433334 0.47499899999999684 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf55 1.99206710006 0 78.526666425 0.6233335749999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf56 1.58691558324 0 78.433334 0.47499899999999684 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf57 2.6038089013 0 78.41666655 0.5000001749999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf58 2.86533054339 0 78.429166875 0.4812496874999894 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf59 2.5635932838 0 78.394166475 0.5337502874999913 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf60 1.63231514248 0 78.133331 0.9250035000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf61 2.07194193579 0 78.55666715 0.5933328500000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf62 2.59394423726 0 78.4616668 0.6883331999999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf63 2.44133377904 0 78.40999965 0.5100005249999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf64 2.31935108241 0 78.418333275 0.49750008750000774 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf65 2.28186154535 0 78.2866669 0.6949996499999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf66 1.88771378107 0 78.396666025 0.5300009624999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf67 3.28260543861 0 78.332499925 0.62625011250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf68 2.93158007413 0 78.3800001 0.5549998499999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf69 2.24152130544 0 78.269999725 0.7200004124999921 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf70 2.17046411412 0 78.226666425 0.7850003624999928 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf71 2.3901980036 0 78.414167425 0.5037488624999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf72 2.20258261036 0 78.42083345 0.49374982499999476 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf73 1.79680900793 0 78.505833075 0.6441669250000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf74 2.03862830664 0 78.6199985 0.5300015000000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf75 1.8569945332 0 78.56750045 0.5824995500000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf76 1.95369833697 0 78.539165875 0.6108341250000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf77 2.06569084995 0 78.4458326 0.4562510999999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf78 2.18361621336 0 78.551667025 0.5983329749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf79 2.14664674472 0 78.43916705 0.466249425000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf80 1.92405135546 0 78.724165975 0.4258340249999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf81 3.24718776399 0 78.3708334 0.5687499000000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf82 2.83566297806 0 78.381667 0.5524995000000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf83 2.1510303661 0 78.436666475 0.47000028750000666 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf84 2.41124195235 0 78.447500425 0.4537493624999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf85 1.88123041257 0 78.49250045 0.6574995500000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf86 2.14664674472 0 78.5775003 0.572499700000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf87 1.79680900793 0 78.694167075 0.4558329250000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf88 2.02237001209 0 78.67083405 0.4791659500000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf89 1.82360897249 0 78.663332725 0.4866672749999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf90 2.18361621336 0 78.354166375 0.5937504374999918 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf91 1.68618253461 0 78.7716667 0.37833330000000276 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf92 3.14140798624 0 78.355833 0.5912504999999939 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf93 2.24152130544 0 78.4691669 0.6808330999999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf94 3.48564996739 0 78.33583295 0.6212505750000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf95 2.49016877877 0 78.4016665 0.5225002499999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf96 2.16634367244 0 78.366666775 0.5749998375000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf97 1.60552156231 0 78.133339 0.9249914999999902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf98 2.48926343774 0 78.3775003 0.5587495500000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf99 3.11142905832 0 78.33083305 0.6287504250000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf100 3.08305776402 0 78.398333325 0.5275000125000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf101 2.59640338923 0 78.502499925 0.647500075000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf102 2.36440403604 0 78.4100001 0.5099998499999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf103 3.06545828495 0 78.413333375 0.5049999375000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf104 1.561580129 0 78.366661 0.5750085000000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf105 2.10377358491 0 78.574166025 0.5758339750000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf106 3.28260543861 0 78.339167025 0.6162494625000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf107 2.51763860959 0 78.321666775 0.6424998375000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf108 2.24152130544 0 78.37416705 0.5637494250000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf109 1.88123041257 0 78.30083255 0.6737511750000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf110 1.99206710006 0 78.47083355 0.6791664500000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf111 2.67606536954 0 78.4125008 0.5062487999999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf112 1.63231514248 0 78.0 1.125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf113 2.16360539179 0 78.3991667 0.5262499500000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf114 2.3343756992 0 78.379999675 0.5550004875000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf115 2.23420690358 0 78.490833225 0.6591667749999971 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf116 2.96586709623 0 78.414166075 0.5037508875000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf117 2.58903988183 0 78.322500425 0.6412493624999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf118 3.15989298643 0 78.379166025 0.5562509624999947 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf119 2.59492733884 0 78.2433332 0.7600002000000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf120 3.02884415997 0 78.363333675 0.5799994874999896 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf121 2.80948259281 0 78.390000625 0.5399990625000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf122 2.07194193579 0 78.608333025 0.5416669750000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf123 2.0051937949 0 78.435834225 0.47124866250000963 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf124 1.561580129 0 78.333336 0.6249959999999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf125 2.10636242153 0 78.525833925 0.6241660749999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf126 2.66590618726 0 77.763333475 1.479999787500006 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf127 3.02516396272 0 77.884999975 1.2975000375000008 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf128 2.27522664257 0 78.025831825 1.086252262500004 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf129 2.76271987895 0 77.709167625 1.5612485624999906 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf130 3.40081490105 0 77.92333335 1.2399999750000106 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf131 2.79400512644 0 78.079165625 1.0062515624999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf132 3.58371362677 0 78.24749945 0.7537508249999902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf133 2.16275109817 0 77.8516661 1.3475008499999959 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf134 3.02516396272 0 78.091666625 0.987500062499997 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf135 2.51763860959 0 78.29666675 0.679999875 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf136 2.98884796399 0 77.69749905 1.578751424999993 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf137 2.85338933385 0 77.795000675 1.4324989875000043 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf138 2.38915544409 0 77.653334675 1.6449979875000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf139 2.24538057124 0 77.54916775 1.8012483750000072 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf140 3.70240971999 0 78.0641668 1.028749800000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf141 3.542000097 0 78.251666625 0.7475000625000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf142 2.81381236756 0 77.702501075 1.571248387499999 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf143 2.16275109817 0 77.61916605 1.696250924999994 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf144 2.17132451316 0 78.177498975 0.858751537499991 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf145 2.14731998471 0 77.715833625 1.5512495624999971 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf146 3.30478754695 0 77.8433334 1.3599998999999912 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf147 2.89409550228 0 77.705832625 1.566251062500001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf148 3.11638574781 0 77.875000025 1.3124999624999916 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf149 2.27126418908 0 77.726667275 1.5349990875000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf150 1.540499209 0 78.299995 0.6750075000000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf151 2.9094681628 0 78.252499575 0.7462506374999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf152 2.29505577961 0 77.8708336 1.3187496000000039 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf153 3.84266973008 0 77.9358333 1.2212500500000019 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf154 2.73950635808 0 77.739999725 1.5150004124999938 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf155 3.14863117051 0 77.776665475 1.4600017874999978 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf156 2.75882367755 0 78.10583375 0.9662493749999967 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf157 1.60552156231 0 77.333336 2.124995999999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf158 2.17563676084 0 77.3866672 2.0449991999999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf159 3.09001480855 0 78.13416635 0.9237504749999985 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf160 1.96279955207 0 78.240832825 0.7637507625000026 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf161 2.34818605648 0 77.6583338 1.6374993000000089 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf162 3.22083792723 0 77.988334125 1.1424988125000084 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf163 2.41549528692 0 77.5875 1.7437499999999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf164 2.6215047202 0 77.724999925 1.5375001124999912 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf165 1.61737349131 0 77.26667 2.2249949999999927 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf166 1.8683963672 0 78.435000275 0.4724995875000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf167 2.83566297806 0 78.3624994 0.5812508999999935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf168 2.45908953163 0 78.142500675 0.9112489875000094 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf169 2.78803206719 0 77.6300002 1.6799997000000033 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf170 2.34818605648 0 77.793333025 1.4350004625000068 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf171 3.30838052095 0 78.097500625 0.9787490625000075 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf172 2.68946786964 0 77.6158327 1.7012509500000021 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf173 2.89409550228 0 77.7600006 1.4849991000000031 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf174 2.74472283119 0 77.914166025 1.2537509624999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf175 3.18710451669 0 78.14500085 0.9074987249999964 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf176 2.29428673152 0 78.021665925 1.0925011125000097 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf177 2.93786958446 0 77.71416625 1.5537506249999922 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf178 2.00828162094 0 77.7725002 1.4662497000000059 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf179 3.25529306887 0 77.90166585 1.2725012249999992 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf180 2.72858886384 0 77.793332925 1.4350006124999979 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf181 2.16104453321 0 77.90333325 1.2700001249999957 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf182 2.17823234081 0 77.578333675 1.7574994875000058 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf183 2.5540304249 0 78.0050003 1.117499549999991 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf184 2.86683022571 0 78.04083325 1.0637501249999914 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf185 1.74073411989 0 77.433342 1.9749870000000058 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf186 3.11638574781 0 77.655000325 1.6424995124999953 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf187 3.18710451669 0 77.884166 1.29875100000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf188 1.62843286633 0 78.266663 0.7250055000000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf189 2.20915635682 0 78.098333725 0.977499412499995 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf190 2.54477507927 0 78.166666375 0.8750004374999918 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf191 2.51763860959 0 78.17833355 0.8574996749999926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf192 3.06580144126 0 77.67583365 1.611249524999998 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf193 3.53925369518 0 78.084999 0.9975015000000056 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf194 2.4337406276 0 77.8741665 1.3137502499999982 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf195 3.72963656481 0 77.89333295 1.285000574999998 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf196 2.16275109817 0 77.9108336 1.2587495999999945 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf197 2.84007388684 0 78.249166825 0.7512497624999952 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf198 3.08305776402 0 78.3541669 0.5937496500000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf199 1.67627481734 0 77.300003 2.1749954999999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf200 3.84266973008 0 77.789166675 1.4412499874999938 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf201 2.00445999726 0 77.79916735 1.426248974999993 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf202 3.24180661389 0 78.29750025 0.6787496250000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf203 2.5540304249 0 78.192500725 0.8362489124999968 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf204 2.64914454991 0 78.190833575 0.8387496375000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf205 3.59736474838 0 78.001666825 1.1224997624999986 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf206 2.00445999726 0 77.709167125 1.56124931250001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf207 2.23238575577 0 77.580833325 1.7537500124999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf208 2.65814376706 0 78.244999925 0.7575001124999972 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf209 2.97779876864 0 77.65083345 1.64874982500001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf210 3.42248933877 0 78.273333075 0.7150003875000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf211 2.98396322778 0 78.078332875 1.0075006874999985 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf212 3.13457659127 0 77.9049995 1.2675007499999964 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf213 2.20630884648 0 77.829166975 1.3812495374999898 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf214 2.35242170136 0 78.25166715 0.7474992749999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf215 3.48520639466 0 78.25583275 0.7412508750000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf216 3.11638574781 0 77.74666595 1.5050010750000098 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf217 3.39701846598 0 78.02666625 1.0850006249999922 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf218 2.23238575577 0 77.995833575 1.131249637499991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf219 2.09813263107 0 77.979165975 1.1562510374999988 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf220 2.98396322778 0 77.924999225 1.2375011625000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf221 2.44242238448 0 77.7983329 1.4275006499999918 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf222 2.66590618726 0 77.830834025 1.3787489624999978 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf223 2.43612195203 0 78.434167675 0.47374848750000353 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf224 3.26928242088 0 77.946667175 1.2049992374999974 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf225 2.59640338923 0 78.41833385 0.49749922500000565 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf226 1.94883255504 0 78.098333575 0.9774996375000029 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf227 2.01775107059 0 77.572500275 1.7662495875000062 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf228 2.64786391927 0 77.929166775 1.2312498375000018 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf229 2.46129952706 0 77.86750025 1.3237496249999907 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf230 1.92906574394 0 77.800833175 1.423750237500009 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf231 2.17132451316 0 78.174166425 0.8637503625000065 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf232 2.93786958446 0 77.677500025 1.6087499624999992 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf233 2.11107096401 0 77.924165825 1.2387512624999957 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf234 3.35910310464 0 78.12250065 0.9412490249999905 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf235 2.24980746522 0 77.8475005 1.353749250000007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf236 2.89104044336 0 77.789999725 1.440000412499998 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf237 2.48136969546 0 77.885832625 1.2962510624999908 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf238 1.74018108892 0 77.795832775 1.4312508375000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf239 2.07523892514 0 77.334167475 2.1237487874999985 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf240 3.59736474838 0 78.0583328 1.0375007999999966 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf241 1.66455840456 0 77.200005 2.3249924999999934 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf242 3.59736474838 0 77.869166975 1.3212495375000017 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf243 2.71748837209 0 78.194166325 0.8337505125000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf244 2.4524832936 0 77.79416675 1.4337498749999966 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf245 1.80975096023 0 77.8899994 1.2900009000000097 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf246 3.64088439105 0 77.9983331 1.1275003500000054 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf247 1.99424293451 0 77.23333 2.2750050000000073 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf248 2.92188437688 0 77.82999985 1.3800002250000105 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf249 3.36364124845 0 78.208333175 0.8125002374999895 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf250 1.69337635738 0 77.699997 1.5750045000000057 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf251 2.3912414734 0 77.454999025 1.9425014624999903 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf252 2.2351185922 0 77.888333 1.2925004999999956 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf253 2.72858886384 0 77.5358331 1.8212503499999926 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf254 2.22694008233 0 77.8283333 1.3825000500000044 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf255 2.19569574585 0 77.805000325 1.417499512500008 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf256 3.01849555693 0 77.5833332 1.7500002000000023 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf257 2.19569574585 0 77.389999575 2.0400006374999933 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf258 2.01775107059 0 77.844166425 1.358750362500004 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf259 2.7795432921 0 78.004999925 1.1175001124999895 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf260 2.16446036058 0 78.245833625 0.7562495624999954 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf261 2.58050173724 0 77.935832575 1.2212511374999906 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf262 2.90329954283 0 77.738334525 1.5174982125000014 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf263 3.45051734728 0 77.903333475 1.269999787500005 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf264 3.24411063565 0 78.197500075 0.8287498875000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf265 2.83830787766 0 77.790834 1.4387489999999943 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf266 2.04182004566 0 78.04249965 1.0612505250000055 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf267 2.36542510121 0 77.42583335 1.9862499749999927 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf268 2.24980746522 0 77.799999375 1.425000937500002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf269 1.89227879259 0 77.91749935 1.248750975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf270 1.88123041257 0 78.80333275 0.3466672500000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf271 3.07993730408 0 78.325000075 0.6374998874999918 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf272 2.7795432921 0 78.15666565 0.8900015250000095 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf273 2.24796080644 0 77.7641655 1.4787517499999936 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf274 2.25629473251 0 77.3750008 2.0624988000000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf275 3.33253479352 0 78.373332825 0.5650007624999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf276 2.48136969546 0 77.792499675 1.436250487499997 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf277 3.46799468161 0 77.8525001 1.3462498499999995 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf278 2.75161695447 0 78.2841667 0.6987499499999998 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf279 2.78803206719 0 77.8383327 1.3675009500000073 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf280 3.06580144126 0 77.6600006 1.6349990999999946 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf281 2.95307508795 0 77.830833225 1.3787501624999905 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf282 2.0579298361 0 77.759999325 1.4850010125000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf283 3.44574192026 0 78.287500025 0.6937499625000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf284 2.16275109817 0 77.733333075 1.5250003874999933 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf285 3.72963656481 0 77.990000175 1.1399997374999913 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf286 2.0051937949 0 78.415000175 0.5024997374999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf287 2.07823096988 0 77.93916665 1.2162500249999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf288 1.96561700982 0 77.73000035 1.5299994750000039 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf289 2.29505577961 0 77.8333338 1.3749992999999918 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf290 3.66721064524 0 77.8116667 1.4074999499999947 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf291 3.82389130321 0 77.93 1.2299999999999898 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf292 2.44460251046 0 78.28166635 0.7025004750000079 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf293 2.15594095941 0 77.4183335 1.9974997499999958 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf294 2.4337406276 0 77.829165775 1.3812513374999895 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf295 2.98884796399 0 77.9175001 1.248749850000003 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf296 2.27522664257 0 78.02666625 1.0850006249999922 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf297 2.83419571826 0 78.140833925 0.9137491125000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf298 2.98104004245 0 78.109166525 0.9612502124999907 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf299 2.22694008233 0 77.81166655 1.4075001750000027 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf300 3.59076159103 0 77.999166425 1.1262503625000022 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf301 1.59477017142 0 78.299995 0.6750075000000066 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf302 2.99538587737 0 78.17166735 0.8674989749999895 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf303 2.24980746522 0 78.02500095 1.0874985749999908 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf304 3.82389130321 0 77.728333225 1.5325001624999999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf305 3.06580144126 0 77.6491664 1.6512504000000021 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf306 2.6708458791 0 78.36916735 0.5712489750000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf307 1.75142089738 0 78.28416715 0.6987492749999973 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf308 2.63512538337 0 78.245000275 0.757499587500007 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf309 2.22694008233 0 77.810833725 1.4087494125000077 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf310 2.90329954283 0 77.685000025 1.5974999625000095 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf311 3.48564996739 0 78.304166975 0.6687495374999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf312 2.46129952706 0 77.8291661 1.3812508500000078 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf313 3.0234941006 0 77.720000225 1.5449996624999898 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf314 2.78803206719 0 77.630832525 1.6787512124999964 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf315 1.83153605016 0 78.329999325 0.6300010124999957 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf316 2.98396322778 0 77.99583315 1.1312502750000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf317 2.98396322778 0 77.8950001 1.2824998499999936 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf318 3.45051734728 0 78.096665625 0.9800015625 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf319 2.87133870651 0 77.78500025 1.447499625000006 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf320 2.85338933385 0 77.722500575 1.541249137500003 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf321 3.49811600913 0 78.051667525 1.0474987125000013 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf322 3.11638574781 0 77.666666675 1.6249999874999972 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf323 3.26928242088 0 77.822500225 1.3912496625000017 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf324 3.77643621697 0 77.9308329 1.228750650000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf325 3.13457659127 0 78.079166175 1.0062507375000038 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf326 2.52111501064 0 77.8916662 1.2875006999999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf327 2.3901980036 0 78.4191667 0.49624994999999217 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf328 2.5540304249 0 77.8974997 1.278750450000004 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf329 2.08376527652 0 78.076667975 1.0099980374999902 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf330 2.82425847867 0 77.597498625 1.7287520625 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf331 2.95498685009 0 78.008332425 1.1125013624999909 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf332 3.66721064524 0 77.970833175 1.1687502375000065 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf333 2.24980746522 0 77.759166525 1.4862502125000034 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf334 2.11760015464 0 77.669999875 1.620000187499997 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf335 2.50405728196 0 77.708333375 1.5624999375000073 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf336 3.71194409149 0 77.987499775 1.1437503374999949 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf337 2.70433982991 0 77.603333275 1.7200000875000043 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf338 2.6262810908 0 77.976667425 1.1599988624999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf339 2.25350989972 0 77.9949997 1.132500450000009 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf340 3.35910310464 0 77.893333275 1.285000087499995 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf341 3.77591555364 0 77.8466659 1.3550011499999925 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf342 3.63363848948 0 77.6966664 1.580000400000003 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf343 2.95498685009 0 78.0666676 1.0249985999999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf344 2.97779876864 0 77.59999905 1.7250014250000092 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf345 3.63363848948 0 77.896666575 1.2800001375000036 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf346 3.01849555693 0 77.669999725 1.620000412500005 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf347 1.69337635738 0 78.233337 0.7749944999999911 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf348 3.45487014523 0 78.031666225 1.0775006625000074 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf349 2.99342150206 0 77.65666695 1.6399995749999974 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf350 3.05895287958 0 77.964999825 1.17750026249999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf351 3.30478754695 0 77.97250065 1.166249024999999 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf352 3.16135199481 0 77.633332675 1.6750009874999918 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf353 2.80373024506 0 78.070832475 1.0187512874999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf354 2.09813263107 0 78.0083335 1.1124997499999907 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf355 3.19453962237 0 77.770000625 1.4699990625000083 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf356 2.9094681628 0 77.677500475 1.6087492874999967 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf357 2.64786391927 0 77.63583395 1.6712490749999915 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf358 2.17823234081 0 77.741666975 1.5124995374999983 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf359 2.1432868672 0 78.15833395 0.8874990750000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf360 2.11514698074 0 78.479999675 0.6700003249999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf361 1.90015610771 0 77.813332925 1.4050006125000039 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf362 2.99538587737 0 78.02499965 1.087500525000003 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf363 2.48881101405 0 78.280000425 0.7049993625000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf364 1.85888516523 0 77.5 1.875 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf365 3.71194409149 0 77.8299993 1.3800010500000042 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf366 2.08281444583 0 77.933333425 1.224999862499999 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf367 2.66590618726 0 78.1133333 0.9550000500000095 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf368 2.24888375674 0 77.844165625 1.3587515624999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf369 3.37566443263 0 78.118332875 0.9475006875000105 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf370 3.89181483916 0 77.944167275 1.2087490875000029 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf371 2.3343756992 0 78.34333455 0.6099981750000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf372 2.73950635808 0 77.709167125 1.56124931250001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 31 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf373 2.73376380311 0 77.810000475 1.409999287500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf374 2.57467948341 0 77.89083425 1.2887486250000038 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf375 1.89424199196 0 78.485832075 0.6641679249999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf376 3.49454519355 0 77.8066671 1.4149993500000022 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf377 1.76439678846 0 77.765000375 1.4774994375000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf378 2.04029892443 0 77.970833925 1.1687491125000093 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf379 3.49454519355 0 77.930000375 1.2299994374999912 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf380 3.24718776399 0 78.372501025 0.566248462499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf381 3.11638574781 0 77.618333675 1.6974994874999965 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf382 2.57467948341 0 78.11749985 0.9487502249999977 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf383 3.01849555693 0 77.804999725 1.4175004124999973 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf384 2.40299088586 0 77.9716665 1.1675002500000033 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf385 3.20613284165 0 78.3483334 0.602499899999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf386 2.89104044336 0 77.71583315 1.5512502750000081 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf387 2.72018747284 0 78.090000325 0.9899995124999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf388 3.06580144126 0 77.6383333 1.667500050000001 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf389 2.39458670776 0 78.169167125 0.871249312499998 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf390 3.25529306887 0 78.100000275 0.974999587500001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf391 2.11694541871 0 78.165834275 0.8762485875000081 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf392 2.0941218638 0 77.7858332 1.4462502000000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf393 3.19453962237 0 77.5908335 1.7387497499999967 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf394 2.03272718838 0 77.99666555 1.1300016749999955 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf395 3.16135199481 0 77.81666755 1.399998674999992 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf396 1.9453718091 0 78.045000025 1.0574999625000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf397 3.30838052095 0 77.929999925 1.2300001124999937 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf398 1.95369833697 0 78.654167425 0.4958325750000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf399 2.91566305143 0 78.091666425 0.9875003625000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf400 2.79400512644 0 77.817500475 1.3987492874999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf401 4.42219194672 0 78.2383339 0.767499149999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf402 3.01949394303 0 77.418333025 1.9975004625000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf403 3.1705014109 0 77.385833225 2.0462501625000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf404 4.26782827201 0 78.063334375 1.029998437500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf405 2.7974298081 0 77.765000075 1.4774998874999952 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf406 4.66234332773 0 78.051667025 1.0474994624999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf407 3.44574192026 0 78.334166525 0.6237502124999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf408 3.91238549312 0 77.97583325 1.1612501250000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf409 5.09612029865 0 78.014167025 1.1037494624999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf410 2.57346981923 0 77.40749955 2.0137506750000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf411 2.79087319279 0 77.4591663 1.9362505499999898 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf412 2.24225538055 0 77.60666765 1.7149985249999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf413 4.35330525145 0 78.2025004 0.8212493999999921 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf414 3.6355679084 0 78.34750005 0.6037499250000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf415 2.09813263107 0 77.782499825 1.4512502624999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf416 3.69591437391 0 77.8908339 1.288749149999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf417 3.70341103092 0 77.961666825 1.182499762500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf418 4.50808625337 0 78.152499875 0.8962501874999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf419 3.6350853616 0 77.693333625 1.584999562500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf420 3.4088575296 0 77.734167475 1.52374878749999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf421 2.84775205688 0 77.76833325 1.4725001250000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf422 3.42248933877 0 78.276666475 0.7100002875000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf423 2.63995156218 0 78.009166925 1.1112496124999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf424 3.14537663121 0 77.1300003 2.429999549999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf425 4.43580061952 0 78.145833225 0.9062501624999939 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf426 3.29564085979 0 78.164166575 0.8787501375000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf427 3.36364124845 0 78.29666615 0.6800007750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf428 3.89292123452 0 77.83166695 1.3774995750000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf429 2.32921384149 0 77.6116661 1.7075008500000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf430 1.99715145162 0 78.1408336 0.9137496000000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf431 2.38209008774 0 77.344166925 2.108749612500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf432 5.01218173084 0 78.131665725 0.9275014124999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf433 3.42634295097 0 77.703333125 1.5700003124999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf434 3.96334183535 0 77.94666705 1.204999424999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf435 2.76355621145 0 77.754999925 1.4925001124999895 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf436 3.542000097 0 78.261667625 0.7324985624999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf437 3.55210680761 0 77.912499025 1.2562514624999963 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf438 4.60279195815 0 78.219165625 0.7962515624999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf439 3.78112865648 0 77.954999725 1.19250041250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf440 3.58371362677 0 78.213333575 0.8049996375000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf441 1.95648970844 0 77.958332825 1.187500762500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf442 3.87145936777 0 78.21166725 0.8074991249999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf443 2.51025423632 0 77.799999 1.4250015000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf444 2.6313276887 0 77.058333425 2.537499862499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf445 3.09350511825 0 78.15583305 0.8912504250000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf446 3.23338646124 0 77.3483334 2.102499899999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf447 4.59661176964 0 78.16250005 0.8812499249999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf448 2.10636242153 0 77.625834225 1.6862486624999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf449 3.28615031572 0 77.718333425 1.547499862500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf450 2.06631426065 0 78.1050001 0.9674998500000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf451 2.92157268012 0 77.90666715 1.2649992749999939 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf452 2.76216260511 0 77.796666575 1.430000137499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf453 3.22311222914 0 77.316665925 2.150001112500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf454 3.12848942641 0 77.253333325 2.2450000125000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf455 4.59198750865 0 78.0950007 0.9824989500000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf456 3.43063493306 0 78.29000005 0.6899999249999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf457 3.1705014109 0 77.2558326 2.2412510999999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf458 2.57346981923 0 78.114167225 0.9537491624999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf459 4.05728597619 0 78.259167625 0.7362485624999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf460 3.17822794726 0 78.03166725 1.0774991250000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf461 4.13446115525 0 78.10249875 0.9712518750000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf462 3.07993730408 0 78.222501075 0.7912483875000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf463 3.31960879381 0 77.75333405 1.4949989249999902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf464 3.06923723471 0 78.10749925 0.9637511249999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf465 3.15989298643 0 78.4099997 0.5100004499999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf466 1.88381840849 0 78.068333425 1.0224998624999913 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf467 2.14345461127 0 77.783333975 1.4499990374999925 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf468 3.5433748969 0 78.10083375 0.9737493749999899 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf469 2.48069541785 0 77.953334375 1.1949984375000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf470 2.61724582801 0 78.066667275 1.0249990874999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf471 3.13780880773 0 77.38166675 2.0524998750000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf472 3.14537663121 0 78.0433336 1.0599996000000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf473 2.94386966745 0 77.93666765 1.219998524999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf474 4.87565925629 0 78.070832475 1.0187512874999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf475 2.37877965366 0 77.91666685 1.2499997250000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf476 2.37444864695 0 77.638333925 1.6674991125000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf477 3.11780484681 0 77.73833275 1.5175008750000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf478 2.35242170136 0 78.116666425 0.950000362499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf479 2.29948783073 0 77.70083405 1.5737489250000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf480 4.39310849558 0 78.1566664 0.890000399999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf481 2.98396322778 0 78.04500105 1.0574984250000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf482 2.67371407651 0 77.9333336 1.2249996000000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf483 3.13565325662 0 77.197499825 2.3287502625000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf484 2.6262810908 0 78.095832075 0.9812518874999938 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf485 3.14140798624 0 78.30083365 0.6737495249999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf486 3.76708619178 0 77.969999475 1.1700007875000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf487 4.28586099646 0 78.05083425 1.0487486250000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf488 4.21982434853 0 78.197500425 0.8287493624999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf489 4.47713386599 0 78.158333425 0.8874998625000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf490 2.58610616022 0 78.057499875 1.0387501874999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf491 2.26900612048 0 77.415832725 2.001250912499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf492 3.02482984275 0 77.2850007 2.1974989500000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf493 1.67627481734 0 77.466667 1.9249994999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf494 3.40081490105 0 78.062499825 1.031250262499995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf495 4.76952130091 0 78.081666625 1.0025000625000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf496 3.82870249017 0 77.916667575 1.249998637499992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf497 4.50808625337 0 78.165832375 0.8762514375000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf498 4.58200012548 0 78.2566666 0.740000099999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf499 3.26850208106 0 77.291665975 2.187501037499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf500 3.27045362948 0 77.225833075 2.2862503875000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf501 3.93656682897 0 78.18666705 0.8449994250000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf502 2.60281905984 0 77.33750125 2.118748124999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf503 1.98599994561 0 77.3616673 2.08249905000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf504 3.369849424 0 78.344999875 0.6075001875000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf505 3.99339287342 0 78.27666705 0.7099994249999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf506 2.6537650949 0 77.456666975 1.9399995374999932 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf507 3.50662601431 0 78.01749995 1.098750074999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf508 4.07904045576 0 78.19666655 0.830000174999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf509 3.96334183535 0 77.91833225 1.247501624999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf510 2.95307508795 0 78.20333425 0.8199986250000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf511 3.6583930271 0 77.99583395 1.1312490749999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf512 4.37206912378 0 78.210834 0.8087489999999917 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf513 3.11142905832 0 78.360000425 0.5849993625000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf514 4.49698282055 0 78.049167125 1.0512493125000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf515 3.05997821259 0 77.234166475 2.2737502875000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf516 4.06752993595 0 78.2175 0.7987499999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf517 4.69511411122 0 77.966666725 1.174999912500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf518 2.48926343774 0 78.28166715 0.7024992749999939 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf519 3.05997821259 0 78.077499525 1.0087507125000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf520 4.44155567719 0 78.173332975 0.8650005375000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf521 3.1270605866 0 78.37666665 0.5600000249999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf522 3.03387706496 0 77.76833345 1.4724998249999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf523 1.93165087062 0 77.835000775 1.3724988375000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf524 3.52603923588 0 78.253333575 0.7449996375000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf525 4.623773346 0 78.157498675 0.888751987500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf526 2.89378970586 0 77.949999975 1.2000000375000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf527 4.89920842557 0 78.030833225 1.0787501625000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf528 3.27045362948 0 78.001666775 1.1224998374999942 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf529 4.36510071125 0 78.22416655 0.7887501749999899 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf530 3.98815916413 0 78.033333725 1.0749994124999915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf531 3.31800009086 0 77.3091659 2.1612511500000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf532 2.94070867727 0 77.4541666 1.9437501000000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf533 3.5433748969 0 78.108332725 0.9625009125000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf534 2.26300649679 0 77.59666755 1.7299986749999903 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf535 2.7974298081 0 77.9049997 1.267500449999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf536 2.59985404578 0 77.8508327 1.348750950000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf537 2.32921384149 0 78.499166175 0.6508338250000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf538 3.20613284165 0 78.336666825 0.619999762500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf539 4.00390160999 0 78.094167025 0.9837494624999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf540 4.82158183139 0 78.18749925 0.8437511249999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf541 2.51025423632 0 77.735833825 1.5212492624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf542 4.42219194672 0 78.2124997 0.8062504500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf543 3.6583930271 0 78.06416625 1.0287506250000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf544 2.43006954226 0 77.7833328 1.450000800000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf545 3.08305776402 0 78.35250035 0.5962494750000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf546 2.55141313335 0 78.160000425 0.8849993624999897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf547 3.76708619178 0 78.090000175 0.9899997374999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf548 2.03862830664 0 78.6366662 0.5133338000000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf549 3.78112865648 0 77.9474998 1.2037502999999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf550 4.60279195815 0 78.17666665 0.860000024999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf551 2.85576960676 0 77.863332975 1.3300005374999913 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf552 3.13565325662 0 78.11416675 0.9537498750000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf553 3.6399166016 0 78.323333375 0.6399999374999936 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf554 3.52150537634 0 77.719999475 1.5450007875000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf555 3.55210680761 0 77.904999525 1.2675007125000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf556 3.33253479352 0 78.405833175 0.5162502375000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf557 2.29948783073 0 77.8283334 1.382499899999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf558 2.98396322778 0 77.880832825 1.3037507625000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf559 3.91238549312 0 77.929998975 1.2300015374999944 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf560 3.69591437391 0 77.944999875 1.2075001875000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf561 3.12527637116 0 77.4366663 1.9700005500000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf562 2.76355621145 0 77.5766677 1.7599984499999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf563 3.82870249017 0 77.8450003 1.357499550000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf564 4.98028095379 0 78.07416665 1.0137500250000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf565 3.06923723471 0 77.241666175 2.262500737499991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf566 3.24411063565 0 78.2841675 0.6987487500000071 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf567 4.2459932947 0 78.1333334 0.9249999000000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf568 3.20200949945 0 78.170832425 0.8687513624999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf569 3.89292123452 0 77.909999275 1.2600010874999938 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf570 2.42920736643 0 78.20916705 0.8112494249999926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf571 3.31960879381 0 77.9474999 1.2037501500000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf572 4.77617552809 0 78.180000725 0.854998912500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf573 3.17822794726 0 77.11833265 2.447501025000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf574 1.85623935679 0 77.859166525 1.3362502124999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf575 3.35622156523 0 77.91166665 1.2575000249999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf576 2.06413396769 0 78.079999725 1.00500041250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf577 2.57952953365 0 78.0558332 1.0412502000000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf578 2.55784699441 0 78.1500002 0.8999997000000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf579 3.48520639466 0 78.24666575 0.755001374999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf580 4.04291144613 0 78.278333075 0.7075003874999908 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf581 2.47777777778 0 77.960000325 1.1849995125000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf582 2.79685844492 0 78.4041662 0.5187506999999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf583 3.12527637116 0 77.344999475 2.1075007875000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 4 -3 gpu conv samp 34 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ -+++++ -conf584 4.79877127275 0 78.166665675 0.8750014874999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges.txt deleted file mode 100644 index 789a4114a5a468b3634506c4016b16b8b80c9131..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges.txt +++ /dev/null @@ -1,6 +0,0 @@ --1.88164262419 2.09340954985 -0.33087718 0.3323643 -0.7782218 0.6020472 -0.978641152382 0.998945295811 --0.978641152382 0.998945295811 -0.2095158 0.33543423 -0.45020863 0.30596754 -0.999703943729 0.999930202961 --0.999703943729 0.999930202961 -0.1715614 0.17037082 -0.6519161 0.5939945 -0.999933600426 0.999940037727 --0.999933600426 0.999940037727 -0.15575546 0.14456555 -0.55873865 0.4704539 -0.99999910593 0.999999344349 --0.99999910593 0.999999344349 -0.16108225 0.16864482 -0.22135437 0.10401678 -0.999434411526 0.999634206295 --0.999434411526 0.999634206295 -0.18183032 0.19018902 -0.07189204 0.106005594 -15.0765653801 19.4225852203 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt deleted file mode 100644 index 8e45529d84b54fc13f19e39f2da94538d54349aa..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/quant_ranges_rt.txt +++ /dev/null @@ -1,7 +0,0 @@ -1 -1.88164262419 2.09340954985 -0.33087718 0.3323643 -0.7782218 0.6020472 -0.978641152382 0.998945295811 -2 -0.978641152382 0.998945295811 -0.2095158 0.33543423 -0.45020863 0.30596754 -0.999703943729 0.999930202961 -3 -0.999703943729 0.999930202961 -0.1715614 0.17037082 -0.6519161 0.5939945 -0.999933600426 0.999940037727 -4 -0.999933600426 0.999940037727 -0.15575546 0.14456555 -0.55873865 0.4704539 -0.99999910593 0.999999344349 -5 -0.99999910593 0.999999344349 -0.16108225 0.16864482 -0.22135437 0.10401678 -0.999434411526 0.999634206295 -6 -0.999434411526 0.999634206295 -0.18183032 0.19018902 -0.07189204 0.106005594 -15.0765653801 19.4225852203 -7 0 0 0 0 0 0 0 0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/run_data/out-run-1 b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/run_data/out-run-1 deleted file mode 100644 index 7c5d66889d07891d1c53b84e9ba29b3876c841e3..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/run_data/out-run-1 +++ /dev/null @@ -1,40506 +0,0 @@ -size_in_bytes = 92928 -DEBUG: ***--- size_in_bytes = 92928 -DEBUG: Attempting to Allocate = 92928 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 363, cStride = 121, hStride = 11, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 256 -DEBUG: ***--- size_in_bytes = 256 -DEBUG: Attempting to Allocate = 256 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 64, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 1228800 -DEBUG: ***--- size_in_bytes = 1228800 -DEBUG: Attempting to Allocate = 1228800 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 1600, cStride = 25, hStride = 5, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 768 -DEBUG: ***--- size_in_bytes = 768 -DEBUG: Attempting to Allocate = 768 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 192, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 2654208 -DEBUG: ***--- size_in_bytes = 2654208 -DEBUG: Attempting to Allocate = 2654208 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 1728, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 1536 -DEBUG: ***--- size_in_bytes = 1536 -DEBUG: Attempting to Allocate = 1536 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 384, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 3538944 -DEBUG: ***--- size_in_bytes = 3538944 -DEBUG: Attempting to Allocate = 3538944 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3456, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 1024 -DEBUG: ***--- size_in_bytes = 1024 -DEBUG: Attempting to Allocate = 1024 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 256, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 2359296 -DEBUG: ***--- size_in_bytes = 2359296 -DEBUG: Attempting to Allocate = 2359296 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2304, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 1024 -DEBUG: ***--- size_in_bytes = 1024 -DEBUG: Attempting to Allocate = 1024 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 256, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 163840 -DEBUG: ***--- size_in_bytes = 163840 -DEBUG: Attempting to Allocate = 163840 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 40960, cStride = 40960, hStride = 10, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 40 -DEBUG: ***--- size_in_bytes = 40 -DEBUG: Attempting to Allocate = 40 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -INITIALIZING GPU 0 -CREATED HANDLES 0 -INFO: -WARNING: File 'opentuner_flags' not found - - -initializing tuner .... -* LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm -- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -*LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -Read PROMISE FLAGS 0 -DONE INTIALIZING GPU 0 -INFO: Reading Quantization Ranges File... -INFO: DONE. -INFO: Reading Configuration File... -DEBUG: first_line: 2000 -DEBUG: Baseline time: 2000.000000 - -DEBUG: line: +++++ -DEBUG: t: +++++ -DEBUG: -DEBUG: line: conf1 3.86 0 79.1 0.0 -DEBUG: t: conf1 -DEBUG: t: 3.86 -DEBUG: t: 0 -DEBUG: t: 79.1 -DEBUG: t: 0.0 -DEBUG: -DEBUG: line: 1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -DEBUG: t: 1 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: pool_max -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 1 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found pool_max operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -DEBUG: t: 2 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: pool_max -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 5 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found pool_max operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -DEBUG: t: 3 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 9 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -DEBUG: t: 4 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 12 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -DEBUG: t: 5 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: pool_max -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 15 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found pool_max operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 6 gpu mul fp32 1 add fp32 1 -DEBUG: t: 6 -DEBUG: t: gpu -DEBUG: t: mul -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 19 - -DEBUG: Found mul operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 7 gpu softmax fp32 1 -DEBUG: t: 7 -DEBUG: t: gpu -DEBUG: t: softmax -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 21 - -DEBUG: Found softmax operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: ----- -DEBUG: t: ----- -DEBUG: -DEBUG: DONE. -INFO: Sorting autotuner configurations... -INFO: Done sorting. -INFO: Speedup Configurations -+++++ -conf1 3.860000 0.000000 79.099998 0.000000 -1 : gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 : gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 : gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 : gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 : gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 : gpu mul fp32 1 add fp32 1 -7 : gpu softmax fp32 1 ------ -DEBUG: slowdowns file not found. Initializing slowdowns randomly. -*LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -* LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm -- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -WARNING: pause_profiler was already called -Initializing policy object ... -DONE: Initializing policy object. -Select target device (0 for CPU, 1 fpr GPU): DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -INFO: Moving 92928 bytes from host to GPU -INFO: Moving 256 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.295738 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.296727 -INFO: TimeDuration, Event = Add_end, Time = 0.000990 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.296762 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.297676 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000914 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.297709 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.301521 -INFO: TimeDuration, Event = Pool_end, Time = 0.003812 -DEBUG: No data movement required - Data on Device -INFO: Moving 1228800 bytes from host to GPU -INFO: Moving 768 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.336723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.337521 -INFO: TimeDuration, Event = Add_end, Time = 0.000797 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.337549 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.338205 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000656 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.338230 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.341296 -INFO: TimeDuration, Event = Pool_end, Time = 0.003065 -DEBUG: No data movement required - Data on Device -INFO: Moving 2654208 bytes from host to GPU -INFO: Moving 1536 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.368838 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.369336 -INFO: TimeDuration, Event = Add_end, Time = 0.000498 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.369364 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.369713 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000349 -DEBUG: No data movement required - Data on Device -INFO: Moving 3538944 bytes from host to GPU -INFO: Moving 1024 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.387317 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.387641 -INFO: TimeDuration, Event = Add_end, Time = 0.000325 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.387659 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.387893 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -INFO: Moving 2359296 bytes from host to GPU -INFO: Moving 1024 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.409175 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.409515 -INFO: TimeDuration, Event = Add_end, Time = 0.000340 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.409542 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.409789 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000247 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.409824 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.412476 -INFO: TimeDuration, Event = Pool_end, Time = 0.002652 -DEBUG: No data movement required - Data on Device -INFO: Moving 163840 bytes from host to GPU -INFO: Moving 40 bytes from host to GPU -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352752.415461 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352752.415614 -INFO: TimeDuration, Event = Mul_end, Time = 0.000153 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.415640 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.415726 -INFO: TimeDuration, Event = Add_end, Time = 0.000086 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352752.415753 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352752.415827 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000073 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 106.693921, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.511666 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.512602 -INFO: TimeDuration, Event = Add_end, Time = 0.000935 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.512719 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.513583 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.513614 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.517411 -INFO: TimeDuration, Event = Pool_end, Time = 0.003797 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.549701 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.550438 -INFO: TimeDuration, Event = Add_end, Time = 0.000736 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.550466 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.551137 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000671 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.551164 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.554256 -INFO: TimeDuration, Event = Pool_end, Time = 0.003092 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.574404 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.574904 -INFO: TimeDuration, Event = Add_end, Time = 0.000500 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.574933 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.575282 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000349 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.590695 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.591018 -INFO: TimeDuration, Event = Add_end, Time = 0.000324 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.591035 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.591267 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.606840 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.607173 -INFO: TimeDuration, Event = Add_end, Time = 0.000333 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.607200 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.607445 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000245 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.607481 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.610131 -INFO: TimeDuration, Event = Pool_end, Time = 0.002650 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352752.610166 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352752.610304 -INFO: TimeDuration, Event = Mul_end, Time = 0.000138 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.610329 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.610363 -INFO: TimeDuration, Event = Add_end, Time = 0.000034 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352752.610389 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352752.610457 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000068 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 114.706602, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.705257 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.706188 -INFO: TimeDuration, Event = Add_end, Time = 0.000931 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.706206 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.707067 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.707086 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.711019 -INFO: TimeDuration, Event = Pool_end, Time = 0.003933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.739861 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.740626 -INFO: TimeDuration, Event = Add_end, Time = 0.000766 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.740641 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.741344 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000703 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.741361 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.744454 -INFO: TimeDuration, Event = Pool_end, Time = 0.003093 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.762705 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.763182 -INFO: TimeDuration, Event = Add_end, Time = 0.000478 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.763200 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.763538 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.777148 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.777472 -INFO: TimeDuration, Event = Add_end, Time = 0.000324 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.777489 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.777724 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.792358 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.792678 -INFO: TimeDuration, Event = Add_end, Time = 0.000320 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.792694 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.792930 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000236 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.792951 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.795635 -INFO: TimeDuration, Event = Pool_end, Time = 0.002685 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352752.795678 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352752.795820 -INFO: TimeDuration, Event = Mul_end, Time = 0.000142 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.795838 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.795875 -INFO: TimeDuration, Event = Add_end, Time = 0.000036 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352752.795892 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352752.796044 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000151 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 101.235232, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.887885 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.888841 -INFO: TimeDuration, Event = Add_end, Time = 0.000957 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.888875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.889753 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000877 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.889766 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.893637 -INFO: TimeDuration, Event = Pool_end, Time = 0.003870 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.920285 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.921065 -INFO: TimeDuration, Event = Add_end, Time = 0.000779 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.921233 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.921897 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000664 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.922060 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.925550 -INFO: TimeDuration, Event = Pool_end, Time = 0.003489 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.943443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.943920 -INFO: TimeDuration, Event = Add_end, Time = 0.000477 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.943934 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.944273 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000339 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.957198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.957520 -INFO: TimeDuration, Event = Add_end, Time = 0.000322 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.957534 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.957766 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.971937 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.972254 -INFO: TimeDuration, Event = Add_end, Time = 0.000318 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352752.972268 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352752.972812 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000545 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352752.972837 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352752.976310 -INFO: TimeDuration, Event = Pool_end, Time = 0.003473 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352752.976609 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352752.976722 -INFO: TimeDuration, Event = Mul_end, Time = 0.000113 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352752.976737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352752.976762 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352752.976778 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352752.976826 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.809286, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.065149 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.066083 -INFO: TimeDuration, Event = Add_end, Time = 0.000934 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.066100 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.066967 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.066981 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.070576 -INFO: TimeDuration, Event = Pool_end, Time = 0.003595 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.096465 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.097196 -INFO: TimeDuration, Event = Add_end, Time = 0.000730 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.097210 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.097857 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.097870 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.101058 -INFO: TimeDuration, Event = Pool_end, Time = 0.003188 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.118896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.119383 -INFO: TimeDuration, Event = Add_end, Time = 0.000487 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.119396 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.119737 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000341 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.132679 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.132997 -INFO: TimeDuration, Event = Add_end, Time = 0.000318 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.133011 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.133242 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.148104 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.148415 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.148679 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.148909 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.148928 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.151397 -INFO: TimeDuration, Event = Pool_end, Time = 0.002469 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352753.151418 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352753.151522 -INFO: TimeDuration, Event = Mul_end, Time = 0.000104 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.151535 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.151558 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352753.151572 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352753.151618 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000047 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.754588, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.245789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.246717 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.246734 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.247597 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.247611 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.250518 -INFO: TimeDuration, Event = Pool_end, Time = 0.002907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.277401 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.278123 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.278136 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.278782 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.278794 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.281994 -INFO: TimeDuration, Event = Pool_end, Time = 0.003200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.299856 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.300328 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.300594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.300928 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.313234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.313542 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.313554 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.313782 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.327964 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.328268 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.328281 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.328510 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.328524 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.331242 -INFO: TimeDuration, Event = Pool_end, Time = 0.002718 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352753.331260 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352753.331357 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.331370 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.331391 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352753.331403 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352753.331446 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.909273, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.422687 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.423625 -INFO: TimeDuration, Event = Add_end, Time = 0.000938 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.423644 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.424514 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.424529 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.427408 -INFO: TimeDuration, Event = Pool_end, Time = 0.002879 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.454669 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.455394 -INFO: TimeDuration, Event = Add_end, Time = 0.000726 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.455412 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.456062 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.456078 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.459262 -INFO: TimeDuration, Event = Pool_end, Time = 0.003184 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.477217 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.477690 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.477708 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.478046 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.490907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.491223 -INFO: TimeDuration, Event = Add_end, Time = 0.000316 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.491239 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.491472 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.505770 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.506079 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.506095 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.506332 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.506353 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.509065 -INFO: TimeDuration, Event = Pool_end, Time = 0.002713 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352753.509091 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352753.509199 -INFO: TimeDuration, Event = Mul_end, Time = 0.000109 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.509217 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.509243 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352753.509260 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352753.509314 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000054 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.303429, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.603297 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.604227 -INFO: TimeDuration, Event = Add_end, Time = 0.000930 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.604246 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.605117 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000871 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.605137 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.608025 -INFO: TimeDuration, Event = Pool_end, Time = 0.002888 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.635294 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.636017 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.636034 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.636686 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.636704 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.639870 -INFO: TimeDuration, Event = Pool_end, Time = 0.003167 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.657818 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.658287 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.658303 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.658640 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.671528 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.671844 -INFO: TimeDuration, Event = Add_end, Time = 0.000316 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.671861 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.672093 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.686390 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.686698 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.686714 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.686948 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.686969 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.689687 -INFO: TimeDuration, Event = Pool_end, Time = 0.002718 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352753.689712 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352753.689820 -INFO: TimeDuration, Event = Mul_end, Time = 0.000108 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.689836 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.689862 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352753.689879 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352753.689960 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000081 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.346969, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.794679 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.795655 -INFO: TimeDuration, Event = Add_end, Time = 0.000976 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.795675 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.796557 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000882 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.796602 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.800610 -INFO: TimeDuration, Event = Pool_end, Time = 0.004008 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.826689 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.827437 -INFO: TimeDuration, Event = Add_end, Time = 0.000748 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.827455 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.828109 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000654 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.828125 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.831268 -INFO: TimeDuration, Event = Pool_end, Time = 0.003143 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.849227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.849699 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.849715 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.850049 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.862950 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.863262 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.863278 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.863510 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.877841 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.878150 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.878168 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.878401 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.878422 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.881142 -INFO: TimeDuration, Event = Pool_end, Time = 0.002720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352753.881167 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352753.881274 -INFO: TimeDuration, Event = Mul_end, Time = 0.000107 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.881290 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.881345 -INFO: TimeDuration, Event = Add_end, Time = 0.000055 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352753.881363 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352753.881433 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000070 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.107548, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352753.974971 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352753.975903 -INFO: TimeDuration, Event = Add_end, Time = 0.000931 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352753.975921 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352753.976792 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000871 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352753.976813 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352753.979693 -INFO: TimeDuration, Event = Pool_end, Time = 0.002880 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.006940 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.007665 -INFO: TimeDuration, Event = Add_end, Time = 0.000725 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.007682 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.008330 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.008343 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.011535 -INFO: TimeDuration, Event = Pool_end, Time = 0.003192 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.029493 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.029961 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.029979 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.030314 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.043182 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.043496 -INFO: TimeDuration, Event = Add_end, Time = 0.000314 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.043512 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.043744 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.058159 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.058471 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.058488 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.058721 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.058742 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.061458 -INFO: TimeDuration, Event = Pool_end, Time = 0.002715 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.061484 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.061591 -INFO: TimeDuration, Event = Mul_end, Time = 0.000108 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.061609 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.061634 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.061651 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.061702 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.157637, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.107636 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.108568 -INFO: TimeDuration, Event = Add_end, Time = 0.000932 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.108585 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.109452 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.109470 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.112363 -INFO: TimeDuration, Event = Pool_end, Time = 0.002893 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.139655 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.140381 -INFO: TimeDuration, Event = Add_end, Time = 0.000726 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.140681 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.141347 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000666 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.141375 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.160179 -INFO: TimeDuration, Event = Pool_end, Time = 0.018804 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.167049 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.167517 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.167534 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.167875 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000341 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.180880 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.181194 -INFO: TimeDuration, Event = Add_end, Time = 0.000314 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.181210 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.181442 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.195792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.196101 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.196117 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.196350 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.196654 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.200143 -INFO: TimeDuration, Event = Pool_end, Time = 0.003489 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.200169 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.200280 -INFO: TimeDuration, Event = Mul_end, Time = 0.000111 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.200297 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.200642 -INFO: TimeDuration, Event = Add_end, Time = 0.000345 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.200662 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.200715 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000053 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 102.664774, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.245472 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.246401 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.246421 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.247286 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.247303 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.250195 -INFO: TimeDuration, Event = Pool_end, Time = 0.002892 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.277832 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.278560 -INFO: TimeDuration, Event = Add_end, Time = 0.000728 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.278577 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.279231 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000654 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.279249 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.282416 -INFO: TimeDuration, Event = Pool_end, Time = 0.003167 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.300418 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.300888 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.300906 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.301247 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000341 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.314229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.314541 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.314559 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.314791 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.329134 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.329443 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.329460 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.329695 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.329715 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.332432 -INFO: TimeDuration, Event = Pool_end, Time = 0.002716 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.332452 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.332562 -INFO: TimeDuration, Event = Mul_end, Time = 0.000110 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.332575 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.332599 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.332619 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.332673 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000053 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.350046, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.375488 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.376413 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.376426 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.377291 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.377307 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.380205 -INFO: TimeDuration, Event = Pool_end, Time = 0.002898 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.407458 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.408177 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.408190 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.408839 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.408852 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.412049 -INFO: TimeDuration, Event = Pool_end, Time = 0.003197 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.429847 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.430312 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.430325 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.430660 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.443295 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.443605 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.443618 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.443848 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.458026 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.458329 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.458342 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.458572 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.458588 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.461320 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.461341 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.461437 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.461452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.461473 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.461487 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.461529 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.552008, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.503698 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.504626 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.504643 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.505512 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.505526 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.508422 -INFO: TimeDuration, Event = Pool_end, Time = 0.002896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.535646 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.536364 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.536374 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.537024 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.537035 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.540240 -INFO: TimeDuration, Event = Pool_end, Time = 0.003205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.559030 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.559497 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.559510 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.559849 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.572552 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.572861 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.572874 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.573104 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.587285 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.587589 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.587600 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.587830 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.587845 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.590578 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.590598 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.590695 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.590707 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.590728 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.590743 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.590785 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.614554, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.634462 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.635377 -INFO: TimeDuration, Event = Add_end, Time = 0.000915 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.635391 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.636252 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.636267 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.639200 -INFO: TimeDuration, Event = Pool_end, Time = 0.002934 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.666482 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.667205 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.667219 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.667867 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.667879 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.671074 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.688862 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.689326 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.689340 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.689672 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.702370 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.702679 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.702691 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.702922 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.717103 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.717406 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.717420 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.717649 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.717664 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.720394 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.720596 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.720694 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.720708 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.720731 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.720744 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.720795 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.872673, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.763242 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.764163 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.764179 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.765044 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.765059 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.767964 -INFO: TimeDuration, Event = Pool_end, Time = 0.002905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.795206 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.795925 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.795938 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.796635 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000697 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.796652 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.799809 -INFO: TimeDuration, Event = Pool_end, Time = 0.003156 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.817599 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.818066 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.818088 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.818431 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000343 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.831092 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.831399 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.831412 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.831639 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.846893 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.847198 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.847212 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.847441 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.847459 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.850184 -INFO: TimeDuration, Event = Pool_end, Time = 0.002725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.850204 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.850302 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.850344 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.850368 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.850382 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.850425 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.703147, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.893594 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.894523 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.894539 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.895404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.895420 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.898329 -INFO: TimeDuration, Event = Pool_end, Time = 0.002909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.925905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.926626 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.926639 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.927291 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000652 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.927305 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.930479 -INFO: TimeDuration, Event = Pool_end, Time = 0.003175 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.948265 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.948731 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.948748 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.949082 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.961782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.962090 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.962102 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.962331 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.976549 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.976854 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352754.976868 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352754.977100 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352754.977115 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352754.979838 -INFO: TimeDuration, Event = Pool_end, Time = 0.002723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352754.979857 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352754.979953 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352754.979965 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352754.979987 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352754.980000 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352754.980047 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.113452, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.022861 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.023787 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.023803 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.024666 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.024681 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.027585 -INFO: TimeDuration, Event = Pool_end, Time = 0.002903 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.054901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.055621 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.055635 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.056284 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.056297 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.059505 -INFO: TimeDuration, Event = Pool_end, Time = 0.003208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.077280 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.077746 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.077759 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.078098 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000339 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.090788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.091097 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.091110 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.091339 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.105533 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.105838 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.105852 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.106081 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.106098 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.108828 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.108847 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.108944 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.108957 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.108978 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.108993 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.109042 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.802193, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.153442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.154370 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.154384 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.155265 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000881 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.155277 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.158165 -INFO: TimeDuration, Event = Pool_end, Time = 0.002887 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.185436 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.186159 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.186173 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.186824 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.186834 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.190009 -INFO: TimeDuration, Event = Pool_end, Time = 0.003174 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.207785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.208249 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.208262 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.208597 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.221270 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.221578 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.221592 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.221821 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.235981 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.236283 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.236295 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.236542 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000247 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.236594 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.239274 -INFO: TimeDuration, Event = Pool_end, Time = 0.002680 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.239293 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.239391 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.239403 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.239425 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.239457 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.239501 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.680308, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.281682 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.282598 -INFO: TimeDuration, Event = Add_end, Time = 0.000916 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.282615 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.283479 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.283495 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.286403 -INFO: TimeDuration, Event = Pool_end, Time = 0.002908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.313657 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.314373 -INFO: TimeDuration, Event = Add_end, Time = 0.000717 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.314387 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.315037 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.315053 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.318249 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.336034 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.336499 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.336586 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.336919 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.349543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.349850 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.349863 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.350092 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.364238 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.364540 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.364551 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.364780 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.364797 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.367534 -INFO: TimeDuration, Event = Pool_end, Time = 0.002737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.367553 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.367650 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.367661 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.367682 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.367696 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.367747 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.674716, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.410305 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.411231 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.411262 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.412155 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000893 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.412169 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.415028 -INFO: TimeDuration, Event = Pool_end, Time = 0.002860 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.442257 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.442975 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.442988 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.443637 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.443650 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.446852 -INFO: TimeDuration, Event = Pool_end, Time = 0.003202 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.464637 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.465101 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.465113 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.465448 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.478136 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.478444 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.478457 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.478688 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.492910 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.493212 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.493226 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.493456 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.493473 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.496203 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.496222 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.496319 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.496330 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.496353 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.496363 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.496412 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.638865, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.539391 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.540312 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.540629 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.541492 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.541506 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.545209 -INFO: TimeDuration, Event = Pool_end, Time = 0.003703 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.571374 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.572095 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.572108 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.572756 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.572773 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.575963 -INFO: TimeDuration, Event = Pool_end, Time = 0.003190 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.593751 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.594217 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.594231 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.594568 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.607208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.607516 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.607530 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.607756 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000226 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.621935 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.622240 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.622253 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.622483 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.622500 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.625230 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.625250 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.625346 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.625361 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.625382 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.625396 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.625446 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.572386, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.667520 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.668447 -INFO: TimeDuration, Event = Add_end, Time = 0.000927 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.668617 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.669481 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.669497 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.673294 -INFO: TimeDuration, Event = Pool_end, Time = 0.003797 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.699497 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.700214 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.700229 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.700881 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000653 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.700894 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.704087 -INFO: TimeDuration, Event = Pool_end, Time = 0.003192 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.721865 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.722330 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.722344 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.722679 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.735312 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.735619 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.735631 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.735862 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.750042 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.750346 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.750359 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.750588 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.750603 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.753334 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.753354 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.753451 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.753464 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.753485 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.753498 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.753549 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.544899, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.795452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.796384 -INFO: TimeDuration, Event = Add_end, Time = 0.000931 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.796610 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.797476 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.797492 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.801223 -INFO: TimeDuration, Event = Pool_end, Time = 0.003731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.827410 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.828130 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.828143 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.828793 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.828806 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.831997 -INFO: TimeDuration, Event = Pool_end, Time = 0.003191 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.849770 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.850234 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.850247 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.850581 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.863235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.863543 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.863555 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.863788 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.877959 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.878263 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.878275 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.878506 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.878522 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.881254 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352755.881274 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352755.881370 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.881383 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.881404 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352755.881417 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352755.881466 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.313797, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.923658 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.924583 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.924609 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.925472 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.925486 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.928629 -INFO: TimeDuration, Event = Pool_end, Time = 0.003143 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.955635 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.956352 -INFO: TimeDuration, Event = Add_end, Time = 0.000717 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.956363 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.957009 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352755.957021 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352755.960224 -INFO: TimeDuration, Event = Pool_end, Time = 0.003203 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.978006 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.978473 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.978488 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.978822 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352755.991450 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352755.991757 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352755.991770 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352755.991997 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.006181 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.006485 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.006497 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.006727 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.006743 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.009477 -INFO: TimeDuration, Event = Pool_end, Time = 0.002734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.009497 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.009594 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.009607 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.009629 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.009643 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.009686 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.557515, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.052979 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.053921 -INFO: TimeDuration, Event = Add_end, Time = 0.000943 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.053938 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.054808 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.054821 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.057683 -INFO: TimeDuration, Event = Pool_end, Time = 0.002861 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.084948 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.085666 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.085678 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.086327 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.086355 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.089549 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.107310 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.107774 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.107788 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.108121 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.120816 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.121126 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.121140 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.121371 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.135570 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.135874 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.135887 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.136117 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.136133 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.138868 -INFO: TimeDuration, Event = Pool_end, Time = 0.002735 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.138885 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.138984 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.138996 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.139019 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.139032 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.139081 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.929582, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.181175 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.182097 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.182113 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.182972 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000859 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.182986 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.185894 -INFO: TimeDuration, Event = Pool_end, Time = 0.002908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.213139 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.213859 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.213872 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.214520 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.214532 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.217726 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.235512 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.235976 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.235989 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.236325 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.249017 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.249325 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.249339 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.249567 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.263736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.264042 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.264055 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.264285 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.264299 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.267029 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.267049 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.267146 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.267159 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.267180 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.267194 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.267244 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.667309, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.309414 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.310334 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.310348 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.311210 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000862 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.311223 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.314145 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.341435 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.342154 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.342168 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.342814 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.342827 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.346034 -INFO: TimeDuration, Event = Pool_end, Time = 0.003207 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.363761 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.364244 -INFO: TimeDuration, Event = Add_end, Time = 0.000483 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.364258 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.364592 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.377259 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.377566 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.377578 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.377807 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.393049 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.393355 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.393367 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.393599 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.393615 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.396343 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.396596 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.396694 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.396707 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.396728 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.396742 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.396791 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.802903, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.439533 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.440463 -INFO: TimeDuration, Event = Add_end, Time = 0.000930 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.440625 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.441503 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000877 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.441525 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.445312 -INFO: TimeDuration, Event = Pool_end, Time = 0.003787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.471522 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.472241 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.472255 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.472903 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.472916 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.476111 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.493881 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.494345 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.494358 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.494695 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.508859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.509179 -INFO: TimeDuration, Event = Add_end, Time = 0.000319 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.509193 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.509425 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.523562 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.523864 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.523876 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.524107 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.524122 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.526865 -INFO: TimeDuration, Event = Pool_end, Time = 0.002744 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.526885 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.527000 -INFO: TimeDuration, Event = Mul_end, Time = 0.000115 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.527013 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.527039 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.527052 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.527099 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.956891, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.569183 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.570115 -INFO: TimeDuration, Event = Add_end, Time = 0.000932 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.570129 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.570991 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.571005 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.573903 -INFO: TimeDuration, Event = Pool_end, Time = 0.002898 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.601158 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.601880 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.601894 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.602542 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.602555 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.605753 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.623492 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.623955 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.623968 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.624302 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.637022 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.637331 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.637343 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.637574 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.651747 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.652050 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.652062 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.652292 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.652315 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.655040 -INFO: TimeDuration, Event = Pool_end, Time = 0.002725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.655058 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.655154 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.655165 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.655187 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.655200 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.655248 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.417775, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.697946 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.698866 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.698880 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.699737 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000857 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.699750 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.702664 -INFO: TimeDuration, Event = Pool_end, Time = 0.002914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.729941 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.730661 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.730674 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.731322 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.731334 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.734531 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.752316 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.752781 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.752796 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.753130 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.765795 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.766101 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.766113 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.766342 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.780542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.780844 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.780858 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.781089 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.781105 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.783834 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.783853 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.783947 -INFO: TimeDuration, Event = Mul_end, Time = 0.000095 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.783962 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.783983 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.783996 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.784049 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000053 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.744015, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.826546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.827469 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.827484 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.828348 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.828654 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.832630 -INFO: TimeDuration, Event = Pool_end, Time = 0.003976 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.858556 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.859274 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.859288 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.859936 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.859948 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.863164 -INFO: TimeDuration, Event = Pool_end, Time = 0.003217 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.884234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.884701 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.884717 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.885052 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.897904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.898211 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.898225 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.898453 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.913749 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.914054 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.914066 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.914296 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.914345 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.917099 -INFO: TimeDuration, Event = Pool_end, Time = 0.002755 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352756.917125 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352756.917230 -INFO: TimeDuration, Event = Mul_end, Time = 0.000106 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.917247 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.917270 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352756.917286 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352756.917335 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 100.045589, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.960559 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.961482 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.961505 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.962380 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000875 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.962399 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.966321 -INFO: TimeDuration, Event = Pool_end, Time = 0.003922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352756.993171 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352756.993890 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352756.993903 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352756.994553 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352756.994564 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352756.997758 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.015599 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.016062 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.016074 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.016410 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.029710 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.030023 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.030037 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.030266 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.044430 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.044735 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.044748 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.044980 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.044996 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.047721 -INFO: TimeDuration, Event = Pool_end, Time = 0.002726 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.047740 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.047838 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.047851 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.047872 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.047886 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.047928 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.002266, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.092293 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.093244 -INFO: TimeDuration, Event = Add_end, Time = 0.000951 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.093260 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.094125 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.094138 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.097016 -INFO: TimeDuration, Event = Pool_end, Time = 0.002878 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.124234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.124956 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.124972 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.125622 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.125634 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.128831 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.146605 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.147084 -INFO: TimeDuration, Event = Add_end, Time = 0.000479 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.147096 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.147429 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.160106 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.160415 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.160579 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.160806 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.174843 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.175148 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.175161 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.175401 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000240 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.175417 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.178136 -INFO: TimeDuration, Event = Pool_end, Time = 0.002719 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.178156 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.178260 -INFO: TimeDuration, Event = Mul_end, Time = 0.000104 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.178273 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.178297 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.178310 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.178362 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.436146, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.220421 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.221344 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.221360 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.222225 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.222239 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.225141 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.252421 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.253143 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.253155 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.253799 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000644 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.253810 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.257011 -INFO: TimeDuration, Event = Pool_end, Time = 0.003200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.274784 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.275249 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.275262 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.275596 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.288263 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.288570 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.288583 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.288811 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.303012 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.303314 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.303326 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.303555 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.303571 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.306302 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.306322 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.306418 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.306431 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.306453 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.306466 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.306516 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.622714, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.348541 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.349465 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.349482 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.350345 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.350359 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.353261 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.380486 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.381210 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.381223 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.381875 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000652 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.381887 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.385101 -INFO: TimeDuration, Event = Pool_end, Time = 0.003214 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.404758 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.405226 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.405239 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.405575 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.418323 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.418632 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.418644 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.418873 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.433074 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.433378 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.433392 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.433623 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.433640 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.436365 -INFO: TimeDuration, Event = Pool_end, Time = 0.002725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.436617 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.436717 -INFO: TimeDuration, Event = Mul_end, Time = 0.000100 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.436732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.436753 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.436767 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.436810 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.798136, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.481109 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.482038 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.482061 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.482947 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000886 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.482968 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.486886 -INFO: TimeDuration, Event = Pool_end, Time = 0.003918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.513193 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.513915 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.513929 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.514580 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.514592 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.517770 -INFO: TimeDuration, Event = Pool_end, Time = 0.003177 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.538723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.539191 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.539206 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.539542 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.552263 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.552576 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.552590 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.552820 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.566997 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.567300 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.567313 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.567542 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.567558 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.570292 -INFO: TimeDuration, Event = Pool_end, Time = 0.002734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.570312 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.570409 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.570421 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.570445 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.570458 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.570507 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.034892, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.612277 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.613226 -INFO: TimeDuration, Event = Add_end, Time = 0.000949 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.613242 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.614114 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000872 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.614126 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.616996 -INFO: TimeDuration, Event = Pool_end, Time = 0.002870 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.644283 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.645010 -INFO: TimeDuration, Event = Add_end, Time = 0.000726 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.645023 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.645670 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.645683 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.648875 -INFO: TimeDuration, Event = Pool_end, Time = 0.003191 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.666661 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.667124 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.667137 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.667472 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.680148 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.680456 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.680466 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.680696 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.694877 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.695181 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.695192 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.695421 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.695436 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.698165 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.698186 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.698283 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.698314 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.698335 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.698349 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.698393 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.642825, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.740218 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.741135 -INFO: TimeDuration, Event = Add_end, Time = 0.000917 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.741152 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.742014 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.742028 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.744936 -INFO: TimeDuration, Event = Pool_end, Time = 0.002908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.772207 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.772926 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.772940 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.773588 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.773601 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.776796 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.794567 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.795034 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.795046 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.795380 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.808044 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.808352 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.808587 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.808814 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.822794 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.823099 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.823112 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.823342 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.823357 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.826084 -INFO: TimeDuration, Event = Pool_end, Time = 0.002727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.826103 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.826201 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.826216 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.826237 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.826250 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.826300 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.374752, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.868160 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.869081 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.869098 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.869966 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.869980 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.872892 -INFO: TimeDuration, Event = Pool_end, Time = 0.002912 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.900166 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.900883 -INFO: TimeDuration, Event = Add_end, Time = 0.000717 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.900898 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.901545 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.901557 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.904781 -INFO: TimeDuration, Event = Pool_end, Time = 0.003224 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.922559 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.923026 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.923047 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.923388 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000341 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.936021 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.936332 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.936342 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.936573 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.951798 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.952106 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352757.952119 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352757.952350 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352757.952617 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352757.955090 -INFO: TimeDuration, Event = Pool_end, Time = 0.002472 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352757.955109 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352757.955206 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352757.955220 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352757.955242 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352757.955258 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352757.955300 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.403262, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.001786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.002735 -INFO: TimeDuration, Event = Add_end, Time = 0.000949 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.002774 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.003668 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000894 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.003690 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.008584 -INFO: TimeDuration, Event = Pool_end, Time = 0.004894 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.033909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.034634 -INFO: TimeDuration, Event = Add_end, Time = 0.000725 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.034647 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.035303 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000656 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.035315 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.038506 -INFO: TimeDuration, Event = Pool_end, Time = 0.003191 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.056487 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.056954 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.056968 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.057302 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.070202 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.070514 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.070528 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.070757 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.085045 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.085351 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.085365 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.085594 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.085626 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.088326 -INFO: TimeDuration, Event = Pool_end, Time = 0.002700 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.088869 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.089015 -INFO: TimeDuration, Event = Mul_end, Time = 0.000146 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.089031 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.089059 -INFO: TimeDuration, Event = Add_end, Time = 0.000029 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.089074 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.089122 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000047 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.844215, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.134027 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.134948 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.134975 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.135869 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000894 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.135882 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.138742 -INFO: TimeDuration, Event = Pool_end, Time = 0.002859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.166016 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.166734 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.166748 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.167393 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.167405 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.170607 -INFO: TimeDuration, Event = Pool_end, Time = 0.003202 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.188391 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.188854 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.188881 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.189215 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.201859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.202167 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.202179 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.202408 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.216931 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.217236 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.217248 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.217479 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.217495 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.220215 -INFO: TimeDuration, Event = Pool_end, Time = 0.002720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.220234 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.220331 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.220598 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.220623 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.220636 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.220680 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.683241, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.270430 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.271359 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.271380 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.272256 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000876 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.272276 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.275153 -INFO: TimeDuration, Event = Pool_end, Time = 0.002877 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.302750 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.303473 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.303492 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.304140 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.304157 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.308401 -INFO: TimeDuration, Event = Pool_end, Time = 0.004244 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.326365 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.326833 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.326852 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.327189 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.340214 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.340527 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.340547 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.340788 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000241 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.356525 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.356835 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.356854 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.357088 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.357112 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.359822 -INFO: TimeDuration, Event = Pool_end, Time = 0.002710 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.359848 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.359961 -INFO: TimeDuration, Event = Mul_end, Time = 0.000112 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.359979 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.360006 -INFO: TimeDuration, Event = Add_end, Time = 0.000028 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.360026 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.360085 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000059 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.827574, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.408357 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.409283 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.409299 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.410157 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000858 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.410170 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.413056 -INFO: TimeDuration, Event = Pool_end, Time = 0.002886 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.440321 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.441040 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.441053 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.441700 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.441713 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.444908 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.463795 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.464267 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.464283 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.464621 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.477281 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.477592 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.477606 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.477837 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.493443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.493750 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.493763 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.493994 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.494010 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.496736 -INFO: TimeDuration, Event = Pool_end, Time = 0.002726 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.496755 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.496862 -INFO: TimeDuration, Event = Mul_end, Time = 0.000107 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.496876 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.496897 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.496910 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.496959 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.179712, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.543860 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.544787 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.544804 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.545668 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.545681 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.548577 -INFO: TimeDuration, Event = Pool_end, Time = 0.002896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.575819 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.576539 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.576549 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.577195 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.577211 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.580410 -INFO: TimeDuration, Event = Pool_end, Time = 0.003200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.598187 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.598652 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.598665 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.599001 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.611633 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.611940 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.611952 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.612184 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.626363 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.626666 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.626679 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.626910 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.626930 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.629660 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.629680 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.629777 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.629791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.629812 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.629826 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.629872 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.497216, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.671970 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.672894 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.672912 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.673778 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.673792 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.676692 -INFO: TimeDuration, Event = Pool_end, Time = 0.002900 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.703933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.704653 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.704667 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.705316 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.705329 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.708524 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.726305 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.726769 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.726781 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.727115 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.739763 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.740070 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.740081 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.740328 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000247 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.754493 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.754796 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.754809 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.755040 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.755057 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.757787 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.757807 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.757904 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.757918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.757939 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.757952 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.758002 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.552111, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.800342 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.801262 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.801277 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.802138 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000860 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.802153 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.805057 -INFO: TimeDuration, Event = Pool_end, Time = 0.002904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.832298 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.833026 -INFO: TimeDuration, Event = Add_end, Time = 0.000728 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.833041 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.833688 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.833701 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.836897 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.854673 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.855138 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.855151 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.855483 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.868164 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.868471 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.868583 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.868809 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000226 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.882900 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.883205 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.883217 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.883446 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.883462 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.886198 -INFO: TimeDuration, Event = Pool_end, Time = 0.002736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352758.886218 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352758.886320 -INFO: TimeDuration, Event = Mul_end, Time = 0.000102 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.886333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.886355 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352758.886368 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352758.886418 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.516562, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.928252 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.929176 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.929191 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.930056 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.930070 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.932973 -INFO: TimeDuration, Event = Pool_end, Time = 0.002903 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.960398 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.961126 -INFO: TimeDuration, Event = Add_end, Time = 0.000728 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.961141 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.961790 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352758.961802 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352758.965109 -INFO: TimeDuration, Event = Pool_end, Time = 0.003307 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.982725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.983192 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.983205 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.983541 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352758.996250 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352758.996558 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352758.996593 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352758.996821 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.010957 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.011262 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.011274 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.011504 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.011521 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.014258 -INFO: TimeDuration, Event = Pool_end, Time = 0.002737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.014278 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.014375 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.014388 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.014410 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.014423 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.014466 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.708803, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.056509 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.057436 -INFO: TimeDuration, Event = Add_end, Time = 0.000927 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.057467 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.058329 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000862 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.058344 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.061224 -INFO: TimeDuration, Event = Pool_end, Time = 0.002880 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.088534 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.089255 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.089268 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.089918 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.089930 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.093120 -INFO: TimeDuration, Event = Pool_end, Time = 0.003190 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.110885 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.111348 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.111361 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.111697 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.124379 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.124687 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.124701 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.124931 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.139108 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.139411 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.139423 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.139655 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.139671 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.142408 -INFO: TimeDuration, Event = Pool_end, Time = 0.002737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.142428 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.142527 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.142539 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.142560 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.142574 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.142617 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.650631, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.184643 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.185564 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.185580 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.186438 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000858 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.186453 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.189360 -INFO: TimeDuration, Event = Pool_end, Time = 0.002907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.216627 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.217345 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.217358 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.218005 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.218016 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.221217 -INFO: TimeDuration, Event = Pool_end, Time = 0.003201 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.239015 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.239494 -INFO: TimeDuration, Event = Add_end, Time = 0.000479 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.239507 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.239841 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.252523 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.252829 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.252841 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.253069 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.267229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.267530 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.267544 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.267773 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.267790 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.270530 -INFO: TimeDuration, Event = Pool_end, Time = 0.002740 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.270551 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.270648 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.270661 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.270682 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.270695 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.270738 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.655380, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.313535 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.314463 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.314479 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.315347 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.315361 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.318253 -INFO: TimeDuration, Event = Pool_end, Time = 0.002891 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.345483 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.346202 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.346214 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.346863 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.346875 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.350076 -INFO: TimeDuration, Event = Pool_end, Time = 0.003201 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.367863 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.368326 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.368336 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.368670 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.381367 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.381676 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.381694 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.381924 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.396074 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.396658 -INFO: TimeDuration, Event = Add_end, Time = 0.000584 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.396676 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.396906 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.396923 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.399368 -INFO: TimeDuration, Event = Pool_end, Time = 0.002445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.399387 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.399485 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.399498 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.399519 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.399533 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.399582 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.624122, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.442100 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.443021 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.443037 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.443901 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.443914 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.446826 -INFO: TimeDuration, Event = Pool_end, Time = 0.002911 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.474059 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.474778 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.474792 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.475440 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.475452 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.478650 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.496423 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.496887 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.496900 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.497234 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.509940 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.510248 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.510261 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.510500 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000239 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.524690 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.524993 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.525006 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.525235 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.525251 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.527980 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.527998 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.528093 -INFO: TimeDuration, Event = Mul_end, Time = 0.000095 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.528106 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.528128 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.528141 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.528190 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.633442, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.570802 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.571727 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.571743 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.572604 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.572628 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.575521 -INFO: TimeDuration, Event = Pool_end, Time = 0.002893 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.602782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.603501 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.603515 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.604166 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.604178 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.607379 -INFO: TimeDuration, Event = Pool_end, Time = 0.003201 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.625171 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.625637 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.625658 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.626000 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000342 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.638644 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.638953 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.638966 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.639195 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.654444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.654750 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.654763 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.654995 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.655011 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.657735 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.657755 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.657853 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.657867 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.657888 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.657902 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.657946 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.670346, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.701064 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.701984 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.702001 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.702864 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.702878 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.705818 -INFO: TimeDuration, Event = Pool_end, Time = 0.002941 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.733020 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.733742 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.733757 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.734403 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.734414 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.737612 -INFO: TimeDuration, Event = Pool_end, Time = 0.003197 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.755384 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.755849 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.755862 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.756197 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.768889 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.769195 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.769208 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.769436 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.783589 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.783892 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.783906 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.784136 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.784153 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.786884 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.786904 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.787002 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.787015 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.787036 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.787050 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.787098 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.650452, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.829393 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.830316 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.830329 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.831194 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.831208 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.834110 -INFO: TimeDuration, Event = Pool_end, Time = 0.002901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.861353 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.862070 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.862092 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.862741 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.862752 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.865950 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.883743 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.884208 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.884222 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.884556 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.897253 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.897563 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.897575 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.897807 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.911957 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.912259 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.912272 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.912500 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.912592 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.915252 -INFO: TimeDuration, Event = Pool_end, Time = 0.002660 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352759.915291 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352759.915388 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.915402 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.915423 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352759.915437 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352759.915482 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.731406, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.957877 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.958799 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.958814 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.959678 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.959692 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.962598 -INFO: TimeDuration, Event = Pool_end, Time = 0.002905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352759.989875 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352759.990595 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352759.990607 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352759.991253 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352759.991266 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352759.994465 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.012212 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.012676 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.012689 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.013023 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.025714 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.026023 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.026035 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.026264 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.040480 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.040784 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.040797 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.041028 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.041045 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.043769 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.043788 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.043884 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.043896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.043918 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.043932 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.043974 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.760158, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.086103 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.087022 -INFO: TimeDuration, Event = Add_end, Time = 0.000919 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.087037 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.087901 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.087915 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.090826 -INFO: TimeDuration, Event = Pool_end, Time = 0.002910 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.118069 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.118788 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.118801 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.119448 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.119459 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.122666 -INFO: TimeDuration, Event = Pool_end, Time = 0.003206 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.140444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.140907 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.140919 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.141254 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.153946 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.154253 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.154265 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.154494 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.168688 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.168990 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.169002 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.169230 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.169246 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.171978 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.171996 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.172091 -INFO: TimeDuration, Event = Mul_end, Time = 0.000095 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.172105 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.172126 -INFO: TimeDuration, Event = Add_end, Time = 0.000020 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.172139 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.172181 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.612675, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.214361 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.215290 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.215305 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.216170 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.216184 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.219080 -INFO: TimeDuration, Event = Pool_end, Time = 0.002896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.246349 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.247072 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.247092 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.247749 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000657 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.247768 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.250938 -INFO: TimeDuration, Event = Pool_end, Time = 0.003170 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.268710 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.269174 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.269187 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.269519 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000331 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.282214 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.282524 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.282535 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.282765 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.296950 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.297254 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.297265 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.297494 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.297510 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.300241 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.300262 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.300358 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.300368 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.300389 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.300399 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.300442 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.605327, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.343389 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.344332 -INFO: TimeDuration, Event = Add_end, Time = 0.000943 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.344664 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.345533 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000869 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.345547 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.349289 -INFO: TimeDuration, Event = Pool_end, Time = 0.003742 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.375353 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.376078 -INFO: TimeDuration, Event = Add_end, Time = 0.000725 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.376091 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.376740 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.376754 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.379956 -INFO: TimeDuration, Event = Pool_end, Time = 0.003202 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.397855 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.398321 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.398333 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.398668 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.411405 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.411712 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.411724 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.411955 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.426169 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.426474 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.426487 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.426717 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.426734 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.429471 -INFO: TimeDuration, Event = Pool_end, Time = 0.002737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.429492 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.429593 -INFO: TimeDuration, Event = Mul_end, Time = 0.000101 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.429607 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.429631 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.429645 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.429690 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.964809, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.471691 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.472638 -INFO: TimeDuration, Event = Add_end, Time = 0.000947 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.472655 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.473520 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.473534 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.476413 -INFO: TimeDuration, Event = Pool_end, Time = 0.002879 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.503655 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.504374 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.504591 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.505241 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.505253 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.508262 -INFO: TimeDuration, Event = Pool_end, Time = 0.003009 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.526026 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.526495 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.526516 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.526861 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000344 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.539466 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.539774 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.539788 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.540018 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.554196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.554500 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.554513 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.554743 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.554758 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.557497 -INFO: TimeDuration, Event = Pool_end, Time = 0.002738 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.557518 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.557618 -INFO: TimeDuration, Event = Mul_end, Time = 0.000100 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.557633 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.557655 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.557668 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.557729 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000061 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.478130, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.611538 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.612464 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.612477 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.613342 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.613356 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.616262 -INFO: TimeDuration, Event = Pool_end, Time = 0.002906 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.643477 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.644198 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.644211 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.644861 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.644873 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.648069 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.665827 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.666291 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.666303 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.666638 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.679269 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.679577 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.679589 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.679819 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.693983 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.694288 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.694301 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.694530 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.694546 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.697275 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.697294 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.697391 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.697405 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.697426 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.697440 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.697483 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.492416, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.740007 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.740931 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.740947 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.741813 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.741829 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.744728 -INFO: TimeDuration, Event = Pool_end, Time = 0.002899 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.771864 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.772583 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.772608 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.773256 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.773268 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.776656 -INFO: TimeDuration, Event = Pool_end, Time = 0.003388 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.794227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.794692 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.794704 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.795038 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.807628 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.807936 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.807948 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.808176 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.822348 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.822653 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.822665 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.822895 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.822911 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.825643 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.825663 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.825762 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.825775 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.825797 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.825810 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.825854 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.341568, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.867975 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.868898 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.868915 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.869778 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.869792 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.872696 -INFO: TimeDuration, Event = Pool_end, Time = 0.002904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.899814 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.900536 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.900546 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.901195 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.901206 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.904403 -INFO: TimeDuration, Event = Pool_end, Time = 0.003197 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.922185 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.922649 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.922662 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.922997 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.935595 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.935901 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.935915 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.936143 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.950314 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.950620 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.950632 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.950864 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.950880 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352760.953606 -INFO: TimeDuration, Event = Pool_end, Time = 0.002727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352760.953626 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352760.953723 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.953737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.953758 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352760.953771 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352760.953813 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.333172, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352760.996164 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352760.997090 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352760.997105 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352760.997971 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352760.997986 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.000902 -INFO: TimeDuration, Event = Pool_end, Time = 0.002917 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.028030 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.028750 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.028764 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.029415 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.029427 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.032646 -INFO: TimeDuration, Event = Pool_end, Time = 0.003219 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.052561 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.053027 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.053041 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.053375 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.066139 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.066450 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.066462 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.066705 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000243 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.081946 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.082252 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.082264 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.082493 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.082509 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.085244 -INFO: TimeDuration, Event = Pool_end, Time = 0.002735 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.085264 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.085362 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.085375 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.085397 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.085410 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.085452 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.885223, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.129069 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.130013 -INFO: TimeDuration, Event = Add_end, Time = 0.000943 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.130038 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.130914 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000877 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.130935 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.134822 -INFO: TimeDuration, Event = Pool_end, Time = 0.003887 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.161210 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.161953 -INFO: TimeDuration, Event = Add_end, Time = 0.000743 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.161968 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.162615 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.162627 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.165798 -INFO: TimeDuration, Event = Pool_end, Time = 0.003171 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.183598 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.184065 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.184078 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.184414 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.197187 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.197497 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.197510 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.197739 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.213855 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.214162 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.214176 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.214407 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.214424 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.217135 -INFO: TimeDuration, Event = Pool_end, Time = 0.002711 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.217155 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.217254 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.217267 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.217288 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.217302 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.217345 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.779466, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.259639 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.260561 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.260574 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.261440 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.261455 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.264356 -INFO: TimeDuration, Event = Pool_end, Time = 0.002900 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.291589 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.292318 -INFO: TimeDuration, Event = Add_end, Time = 0.000729 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.292336 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.292997 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000661 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.293018 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.296164 -INFO: TimeDuration, Event = Pool_end, Time = 0.003146 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.313928 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.314394 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.314407 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.314743 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.327338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.327646 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.327658 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.327889 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.342063 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.342369 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.342382 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.342615 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.342632 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.345355 -INFO: TimeDuration, Event = Pool_end, Time = 0.002722 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.345374 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.345471 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.345484 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.345505 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.345518 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.345563 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.431170, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.389037 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.389957 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.389972 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.390838 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.390851 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.393766 -INFO: TimeDuration, Event = Pool_end, Time = 0.002915 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.420887 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.421608 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.421622 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.422271 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.422282 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.425480 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.443255 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.443720 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.443732 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.444069 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.456730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.457038 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.457051 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.457280 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.471451 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.471754 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.471768 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.471998 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.472014 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.474745 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.474764 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.474860 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.474873 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.474894 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.474907 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.474956 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.794673, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.517101 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.518026 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.518040 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.518902 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000862 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.518916 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.521818 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.548998 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.549721 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.549734 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.550384 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.550396 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.553612 -INFO: TimeDuration, Event = Pool_end, Time = 0.003217 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.574765 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.575248 -INFO: TimeDuration, Event = Add_end, Time = 0.000484 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.575260 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.575592 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.588361 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.588669 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.588683 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.588913 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.604190 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.604496 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.604584 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.604811 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.604830 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.607482 -INFO: TimeDuration, Event = Pool_end, Time = 0.002652 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.607501 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.607598 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.607610 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.607632 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.607644 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.607694 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 100.128707, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.651533 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.652455 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.652477 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.653349 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000872 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.653373 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.657295 -INFO: TimeDuration, Event = Pool_end, Time = 0.003923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.683703 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.684425 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.684596 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.685245 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.685258 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.688293 -INFO: TimeDuration, Event = Pool_end, Time = 0.003036 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.706096 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.706561 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.706596 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.706929 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.723491 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.723804 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.723819 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.724052 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.738210 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.738512 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.738526 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.738753 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.738770 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.741506 -INFO: TimeDuration, Event = Pool_end, Time = 0.002736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.741525 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.741624 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.741638 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.741659 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.741673 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.741716 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.358071, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.783687 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.784615 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.784641 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.785508 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.785522 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.788667 -INFO: TimeDuration, Event = Pool_end, Time = 0.003145 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.815617 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.816337 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.816588 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.817236 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.817248 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.820205 -INFO: TimeDuration, Event = Pool_end, Time = 0.002957 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.837987 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.838451 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.838464 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.838800 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.851445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.851752 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.851765 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.851996 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.866163 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.866467 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.866480 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.866709 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.866724 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.869456 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.869476 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.869573 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.869585 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.869606 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.869620 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.869670 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.281306, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.912024 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.912948 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.912967 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.913834 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.913847 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.916747 -INFO: TimeDuration, Event = Pool_end, Time = 0.002899 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.943933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.944654 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.944668 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.945318 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.945329 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.948522 -INFO: TimeDuration, Event = Pool_end, Time = 0.003193 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.966320 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.966786 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.966799 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.967133 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.979778 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.980102 -INFO: TimeDuration, Event = Add_end, Time = 0.000324 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.980115 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.980352 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.994509 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.994842 -INFO: TimeDuration, Event = Add_end, Time = 0.000334 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352761.994856 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352761.995100 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000245 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352761.995120 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352761.997799 -INFO: TimeDuration, Event = Pool_end, Time = 0.002679 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352761.997819 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352761.997916 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352761.997930 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352761.997952 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352761.997965 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352761.998008 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.265552, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.040566 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.041491 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.041507 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.042376 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.042389 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.045285 -INFO: TimeDuration, Event = Pool_end, Time = 0.002896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.072448 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.073170 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.073184 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.073846 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000662 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.073858 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.077062 -INFO: TimeDuration, Event = Pool_end, Time = 0.003205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.098408 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.098883 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.098895 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.099238 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000343 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.111986 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.112298 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.112625 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.112861 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.127795 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.128100 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.128113 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.128345 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.128579 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.131093 -INFO: TimeDuration, Event = Pool_end, Time = 0.002513 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.131112 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.131216 -INFO: TimeDuration, Event = Mul_end, Time = 0.000104 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.131229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.131252 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.131265 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.131313 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.779873, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.174283 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.175201 -INFO: TimeDuration, Event = Add_end, Time = 0.000918 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.175225 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.176100 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000875 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.176121 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.180042 -INFO: TimeDuration, Event = Pool_end, Time = 0.003921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.206442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.207161 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.207174 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.207824 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.207837 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.211036 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.230915 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.231381 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.231395 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.231728 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.244425 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.244733 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.244745 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.244974 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.259129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.259432 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.259467 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.259697 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.259713 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.262424 -INFO: TimeDuration, Event = Pool_end, Time = 0.002711 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.262444 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.262541 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.262554 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.262575 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.262589 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.262632 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.970744, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.305095 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.306010 -INFO: TimeDuration, Event = Add_end, Time = 0.000915 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.306026 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.306892 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.306905 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.309812 -INFO: TimeDuration, Event = Pool_end, Time = 0.002907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.337020 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.337739 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.337752 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.338400 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.338410 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.341615 -INFO: TimeDuration, Event = Pool_end, Time = 0.003205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.359393 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.359859 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.359871 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.360207 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.372837 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.373144 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.373158 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.373393 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.388769 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.389085 -INFO: TimeDuration, Event = Add_end, Time = 0.000316 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.389098 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.389329 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.389346 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.392065 -INFO: TimeDuration, Event = Pool_end, Time = 0.002719 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.392085 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.392202 -INFO: TimeDuration, Event = Mul_end, Time = 0.000118 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.392215 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.392240 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.392254 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.392302 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.821664, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.434679 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.435610 -INFO: TimeDuration, Event = Add_end, Time = 0.000931 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.435626 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.436492 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.436505 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.439395 -INFO: TimeDuration, Event = Pool_end, Time = 0.002891 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.466550 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.467270 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.467283 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.467935 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000652 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.467947 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.471142 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.488933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.489398 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.489411 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.489742 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000331 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.502371 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.502681 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.502694 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.502922 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.517087 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.517391 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.517404 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.517634 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.517651 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.520378 -INFO: TimeDuration, Event = Pool_end, Time = 0.002728 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.520394 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.520680 -INFO: TimeDuration, Event = Mul_end, Time = 0.000286 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.520695 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.520717 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.520730 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.520794 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000064 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.622661, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.563086 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.564009 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.564023 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.564890 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.564908 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.567803 -INFO: TimeDuration, Event = Pool_end, Time = 0.002895 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.594965 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.595688 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.595709 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.596364 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000655 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.596606 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.599541 -INFO: TimeDuration, Event = Pool_end, Time = 0.002935 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.617325 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.617793 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.617807 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.618142 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.630755 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.631070 -INFO: TimeDuration, Event = Add_end, Time = 0.000315 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.631083 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.631312 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.645505 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.645811 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.645824 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.646055 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.646072 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.650977 -INFO: TimeDuration, Event = Pool_end, Time = 0.004905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.651006 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.651120 -INFO: TimeDuration, Event = Mul_end, Time = 0.000114 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.651134 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.651158 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.651171 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.651225 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000054 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.439010, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.693601 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.694536 -INFO: TimeDuration, Event = Add_end, Time = 0.000935 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.694560 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.695432 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000873 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.695452 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.699376 -INFO: TimeDuration, Event = Pool_end, Time = 0.003923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.725507 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.726233 -INFO: TimeDuration, Event = Add_end, Time = 0.000726 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.726246 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.726891 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000645 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.726903 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.730097 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.747859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.748325 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.748585 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.748919 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.761338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.761648 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.761662 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.761890 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.776026 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.776331 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.776341 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.776571 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.776588 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.779322 -INFO: TimeDuration, Event = Pool_end, Time = 0.002734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.779341 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.779439 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.779452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.779495 -INFO: TimeDuration, Event = Add_end, Time = 0.000043 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.779510 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.779555 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.706491, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.821648 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.822572 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.822587 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.823448 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.823461 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.826366 -INFO: TimeDuration, Event = Pool_end, Time = 0.002905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.853546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.854264 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.854278 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.854924 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.854945 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.858158 -INFO: TimeDuration, Event = Pool_end, Time = 0.003213 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.878227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.878713 -INFO: TimeDuration, Event = Add_end, Time = 0.000485 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.878726 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.879062 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.892680 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.892991 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.893005 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.893234 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.907410 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.907712 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.907725 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.907955 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.907972 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.910705 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352762.910724 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352762.910821 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.910833 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.910855 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352762.910869 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352762.910913 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.897620, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.954437 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.955363 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.955385 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.956259 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000873 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.956280 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.960210 -INFO: TimeDuration, Event = Pool_end, Time = 0.003929 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352762.986405 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352762.987124 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352762.987138 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352762.987782 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000644 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352762.987793 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352762.990995 -INFO: TimeDuration, Event = Pool_end, Time = 0.003202 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.008766 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.009232 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.009245 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.009578 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.022225 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.022531 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.022544 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.022772 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.036948 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.037250 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.037262 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.037489 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.037505 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.040241 -INFO: TimeDuration, Event = Pool_end, Time = 0.002736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.040260 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.040573 -INFO: TimeDuration, Event = Mul_end, Time = 0.000313 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.040588 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.040613 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.040627 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.040671 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.797705, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.082932 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.083861 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.083875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.084743 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.084759 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.087653 -INFO: TimeDuration, Event = Pool_end, Time = 0.002895 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.114835 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.115554 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.115568 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.116218 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.116240 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.122586 -INFO: TimeDuration, Event = Pool_end, Time = 0.006346 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.140414 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.140884 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.140898 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.141232 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.154132 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.154441 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.154454 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.154682 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.168994 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.169302 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.169316 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.169547 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.169565 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.172287 -INFO: TimeDuration, Event = Pool_end, Time = 0.002722 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.172313 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.172415 -INFO: TimeDuration, Event = Mul_end, Time = 0.000102 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.172429 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.172455 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.172469 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.172524 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000055 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.075251, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.223305 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.224260 -INFO: TimeDuration, Event = Add_end, Time = 0.000955 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.224294 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.225433 -INFO: TimeDuration, Event = Tanh_end, Time = 0.001139 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.225453 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.229013 -INFO: TimeDuration, Event = Pool_end, Time = 0.003560 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.255877 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.256614 -INFO: TimeDuration, Event = Add_end, Time = 0.000737 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.256633 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.257283 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.257298 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.261291 -INFO: TimeDuration, Event = Pool_end, Time = 0.003993 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.279264 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.279736 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.279753 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.280091 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.293068 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.293381 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.293397 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.293628 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.309391 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.309704 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.309721 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.309953 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.309974 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.312689 -INFO: TimeDuration, Event = Pool_end, Time = 0.002714 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.312713 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.312819 -INFO: TimeDuration, Event = Mul_end, Time = 0.000107 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.312835 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.312859 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.312876 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.312926 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 102.903166, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.359663 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.360588 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.360604 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.361474 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.361529 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.364384 -INFO: TimeDuration, Event = Pool_end, Time = 0.002856 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.391633 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.392482 -INFO: TimeDuration, Event = Add_end, Time = 0.000849 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.392494 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.393146 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.393159 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.396225 -INFO: TimeDuration, Event = Pool_end, Time = 0.003067 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.416102 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.416570 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.416598 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.416935 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.429553 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.429863 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.429877 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.430105 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.445713 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.446021 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.446035 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.446266 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.446283 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.449006 -INFO: TimeDuration, Event = Pool_end, Time = 0.002723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.449026 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.449124 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.449137 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.449159 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.449172 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.449223 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.996541, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.494062 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.494992 -INFO: TimeDuration, Event = Add_end, Time = 0.000930 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.495007 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.495875 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.495890 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.498786 -INFO: TimeDuration, Event = Pool_end, Time = 0.002895 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.525951 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.526678 -INFO: TimeDuration, Event = Add_end, Time = 0.000727 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.526691 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.527340 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.527352 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.530539 -INFO: TimeDuration, Event = Pool_end, Time = 0.003187 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.548256 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.548722 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.548737 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.549068 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.561765 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.562075 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.562088 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.562317 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.576493 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.576797 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.576809 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.577041 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.577057 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.579787 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.579805 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.579901 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.579913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.579938 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.579951 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.579996 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.494759, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.617065 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.617989 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.618003 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.618871 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000868 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.618884 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.621783 -INFO: TimeDuration, Event = Pool_end, Time = 0.002899 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.649010 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.649731 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.649743 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.650390 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.650400 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.653596 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.671349 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.671813 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.671826 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.672158 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.685804 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.686148 -INFO: TimeDuration, Event = Add_end, Time = 0.000344 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.686183 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.686428 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000245 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.701716 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.702028 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.702044 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.702279 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.702300 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.705014 -INFO: TimeDuration, Event = Pool_end, Time = 0.002714 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.705042 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.705149 -INFO: TimeDuration, Event = Mul_end, Time = 0.000107 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.705166 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.705190 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.705207 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.705256 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.808945, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.743720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.744641 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.744656 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.745517 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000860 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.745530 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.748439 -INFO: TimeDuration, Event = Pool_end, Time = 0.002909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.775624 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.776498 -INFO: TimeDuration, Event = Add_end, Time = 0.000874 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.776577 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.777226 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.777238 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.785470 -INFO: TimeDuration, Event = Pool_end, Time = 0.008233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.801471 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.801939 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.801952 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.802289 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.815051 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.815362 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.815374 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.815603 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.829828 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.830132 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.830145 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.830374 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.830391 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.833122 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.833141 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.833240 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.833254 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.833275 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.833288 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.833332 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.066627, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.872842 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.873765 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.873780 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.874649 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000869 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.874660 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.877562 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.905805 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.906529 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.906542 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.907191 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.907202 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.910385 -INFO: TimeDuration, Event = Pool_end, Time = 0.003183 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.928172 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.928638 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.928652 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.928986 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.941666 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.941973 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.941986 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.942216 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.956383 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.956686 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352763.956698 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352763.956927 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352763.956943 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352763.959676 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352763.959695 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352763.959792 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352763.959805 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352763.959827 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352763.959840 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352763.959884 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.607040, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.002574 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.003498 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.003513 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.004381 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.004609 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.008508 -INFO: TimeDuration, Event = Pool_end, Time = 0.003899 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.034476 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.035200 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.035213 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.035865 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000653 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.035876 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.039070 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.061648 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.062119 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.062133 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.062469 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.075314 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.075622 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.075635 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.075867 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.090121 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.090426 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.090440 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.090670 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.090686 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.093413 -INFO: TimeDuration, Event = Pool_end, Time = 0.002727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.093434 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.093533 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.093545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.093567 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.093580 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.093623 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 100.381268, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.141789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.142712 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.142727 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.143593 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.143606 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.146497 -INFO: TimeDuration, Event = Pool_end, Time = 0.002890 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.174412 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.175138 -INFO: TimeDuration, Event = Add_end, Time = 0.000726 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.175152 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.175801 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.175812 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.178994 -INFO: TimeDuration, Event = Pool_end, Time = 0.003182 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.200651 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.201122 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.201136 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.201476 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000340 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.214299 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.214627 -INFO: TimeDuration, Event = Add_end, Time = 0.000328 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.214639 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.214870 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.231757 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.232069 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.232085 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.232329 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000244 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.232355 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.235053 -INFO: TimeDuration, Event = Pool_end, Time = 0.002698 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.235076 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.235178 -INFO: TimeDuration, Event = Mul_end, Time = 0.000103 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.235193 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.235217 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.235232 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.235278 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 103.250393, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.280891 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.281814 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.281829 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.282708 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000878 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.282722 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.285600 -INFO: TimeDuration, Event = Pool_end, Time = 0.002878 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.313607 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.314329 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.314343 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.314989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.315001 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.318179 -INFO: TimeDuration, Event = Pool_end, Time = 0.003178 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.340876 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.341348 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.341361 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.341700 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000339 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.354364 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.354675 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.354688 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.354919 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.370463 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.370771 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.370785 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.371018 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.371033 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.373757 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.373777 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.373875 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.373887 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.373909 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.373923 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.373973 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 105.513650, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.418438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.419366 -INFO: TimeDuration, Event = Add_end, Time = 0.000927 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.419381 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.420244 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.420259 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.423154 -INFO: TimeDuration, Event = Pool_end, Time = 0.002895 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.450311 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.451033 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.451046 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.451695 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.451707 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.454908 -INFO: TimeDuration, Event = Pool_end, Time = 0.003201 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.472676 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.473140 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.473153 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.473485 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.486151 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.486461 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.486473 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.486703 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.501114 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.501421 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.501433 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.501667 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.501683 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.504407 -INFO: TimeDuration, Event = Pool_end, Time = 0.002725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.504423 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.504521 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.504534 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.504556 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.504569 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.504612 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.657802, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.547469 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.548394 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.548604 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.549467 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.549483 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.553246 -INFO: TimeDuration, Event = Pool_end, Time = 0.003763 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.579344 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.580065 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.580079 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.580729 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.580742 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.583938 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.601696 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.602162 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.602175 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.602511 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.615118 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.615425 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.615437 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.615664 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.631262 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.631570 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.631583 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.631814 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.631831 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.634553 -INFO: TimeDuration, Event = Pool_end, Time = 0.002722 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.634574 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.634673 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.634686 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.634707 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.634720 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.634764 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.638468, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.681282 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.682210 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.682224 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.683086 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.683101 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.686001 -INFO: TimeDuration, Event = Pool_end, Time = 0.002900 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.714161 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.714883 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.714897 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.715544 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.715556 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.718743 -INFO: TimeDuration, Event = Pool_end, Time = 0.003186 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.741343 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.741813 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.741827 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.742168 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000341 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.754797 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.755108 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.755120 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.755351 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.770921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.771230 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.771244 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.771478 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.771495 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.774213 -INFO: TimeDuration, Event = Pool_end, Time = 0.002718 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.774234 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.774333 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.774346 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.774369 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.774382 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.774427 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 103.794670, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.814559 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.815485 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.815499 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.816366 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.816379 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.819292 -INFO: TimeDuration, Event = Pool_end, Time = 0.002913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.846420 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.847141 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.847154 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.847802 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.847813 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.851014 -INFO: TimeDuration, Event = Pool_end, Time = 0.003201 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.868788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.869254 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.869267 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.869602 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.882255 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.882565 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.882577 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.882806 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.896978 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.897281 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.897294 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.897523 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.897539 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.900271 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352764.900289 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352764.900409 -INFO: TimeDuration, Event = Mul_end, Time = 0.000119 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.900419 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.900440 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352764.900450 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352764.900492 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.550525, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.941410 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.942348 -INFO: TimeDuration, Event = Add_end, Time = 0.000939 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.942370 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.943240 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.943259 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.946131 -INFO: TimeDuration, Event = Pool_end, Time = 0.002872 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352764.973617 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352764.974334 -INFO: TimeDuration, Event = Add_end, Time = 0.000717 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352764.974358 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352764.975007 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352764.975025 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352764.978199 -INFO: TimeDuration, Event = Pool_end, Time = 0.003174 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.002045 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.002517 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.002535 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.002874 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000339 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.015818 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.016129 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.016149 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.016382 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.031890 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.032193 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.032207 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.032436 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.032453 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.035185 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.035204 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.035317 -INFO: TimeDuration, Event = Mul_end, Time = 0.000114 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.035332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.035355 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.035369 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.035419 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 104.189396, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.077808 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.078728 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.078746 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.079609 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.079623 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.082536 -INFO: TimeDuration, Event = Pool_end, Time = 0.002913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.109579 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.110289 -INFO: TimeDuration, Event = Add_end, Time = 0.000711 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.110305 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.110952 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.110964 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.114155 -INFO: TimeDuration, Event = Pool_end, Time = 0.003190 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.131860 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.132322 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.132440 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.132774 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.145243 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.145548 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.145561 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.145790 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.159925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.160225 -INFO: TimeDuration, Event = Add_end, Time = 0.000300 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.160239 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.160467 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.160485 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.163222 -INFO: TimeDuration, Event = Pool_end, Time = 0.002737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.163241 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.163332 -INFO: TimeDuration, Event = Mul_end, Time = 0.000091 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.163345 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.163366 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.163380 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.163425 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 94.991052, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.206987 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.207901 -INFO: TimeDuration, Event = Add_end, Time = 0.000914 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.207918 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.208776 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000858 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.208793 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.211711 -INFO: TimeDuration, Event = Pool_end, Time = 0.002918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.238780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.239488 -INFO: TimeDuration, Event = Add_end, Time = 0.000708 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.239503 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.240149 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.240161 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.245465 -INFO: TimeDuration, Event = Pool_end, Time = 0.005304 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.263153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.263616 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.263631 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.263962 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000331 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.276576 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.276880 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.276895 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.277125 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.292295 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.292607 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.292622 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.292851 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.292869 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.295589 -INFO: TimeDuration, Event = Pool_end, Time = 0.002720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.295609 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.295706 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.295719 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.295741 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.295755 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.295801 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.236989, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.344780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.345721 -INFO: TimeDuration, Event = Add_end, Time = 0.000940 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.345750 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.346626 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000875 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.346652 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.350517 -INFO: TimeDuration, Event = Pool_end, Time = 0.003865 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.376643 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.377364 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.377378 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.378026 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.378037 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.381235 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.402642 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.403112 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.403125 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.403460 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.416328 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.416636 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.416649 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.416880 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.431136 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.431442 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.431456 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.431686 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.431705 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.434428 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.434448 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.434545 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.434557 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.434578 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.434590 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.434639 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 101.535042, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.481825 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.482751 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.482766 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.483624 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000857 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.483638 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.486544 -INFO: TimeDuration, Event = Pool_end, Time = 0.002907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.514478 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.515202 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.515216 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.515866 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.515878 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.519074 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.537973 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.538440 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.538454 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.538792 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.551739 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.552052 -INFO: TimeDuration, Event = Add_end, Time = 0.000312 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.552064 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.552295 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.570798 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.571105 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.571119 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.571350 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.571368 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.574094 -INFO: TimeDuration, Event = Pool_end, Time = 0.002726 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.574132 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.574233 -INFO: TimeDuration, Event = Mul_end, Time = 0.000101 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.574246 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.574270 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.574283 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.574335 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 102.825489, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.614216 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.615142 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.615182 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.616045 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.616060 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.618926 -INFO: TimeDuration, Event = Pool_end, Time = 0.002866 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.646139 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.646858 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.646871 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.647518 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.647530 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.650736 -INFO: TimeDuration, Event = Pool_end, Time = 0.003206 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.668493 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.668958 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.668972 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.669306 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.681964 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.682274 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.682286 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.682516 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.696711 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.697018 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.697032 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.697262 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.697278 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.700001 -INFO: TimeDuration, Event = Pool_end, Time = 0.002723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.700022 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.700117 -INFO: TimeDuration, Event = Mul_end, Time = 0.000095 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.700130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.700153 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.700165 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.700207 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.877844, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.737448 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.738376 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.738392 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.739257 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.739270 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.742164 -INFO: TimeDuration, Event = Pool_end, Time = 0.002895 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.769311 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.770033 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.770046 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.770696 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.770707 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.773901 -INFO: TimeDuration, Event = Pool_end, Time = 0.003193 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.791656 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.792120 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.792133 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.792467 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.805106 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.805414 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.805426 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.805654 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.819814 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.820119 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.820131 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.820368 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.820389 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.823109 -INFO: TimeDuration, Event = Pool_end, Time = 0.002720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.823129 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.823226 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.823239 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.823260 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.823273 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.823317 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.658047, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.861047 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.861970 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.861985 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.862844 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000859 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.862857 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.865768 -INFO: TimeDuration, Event = Pool_end, Time = 0.002911 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.892979 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.893700 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.893713 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.894362 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.894373 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.897572 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.915338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.915802 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.915816 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.916152 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.928879 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.929193 -INFO: TimeDuration, Event = Add_end, Time = 0.000315 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.929207 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.929438 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.943563 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.943869 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.943882 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.944111 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.944127 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.946857 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352765.946877 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352765.946976 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.946988 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.947010 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352765.947023 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352765.947068 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.590670, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352765.989897 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352765.990819 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352765.990834 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352765.991699 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352765.991712 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352765.994627 -INFO: TimeDuration, Event = Pool_end, Time = 0.002914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.021786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.022514 -INFO: TimeDuration, Event = Add_end, Time = 0.000728 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.022529 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.023175 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.023188 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.027424 -INFO: TimeDuration, Event = Pool_end, Time = 0.004236 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.045180 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.045648 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.045661 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.045994 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.058645 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.058954 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.058965 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.059193 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.073363 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.073668 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.073681 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.073910 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.073928 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.076658 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.076678 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.076776 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.076788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.076811 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.076824 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.076875 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.573635, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.127312 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.128237 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.128253 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.129115 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000862 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.129129 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.132036 -INFO: TimeDuration, Event = Pool_end, Time = 0.002907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.159212 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.159938 -INFO: TimeDuration, Event = Add_end, Time = 0.000726 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.159951 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.160598 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.160637 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.163799 -INFO: TimeDuration, Event = Pool_end, Time = 0.003162 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.185727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.186197 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.186211 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.186546 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.199256 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.199567 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.199579 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.199809 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.215555 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.215862 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.215875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.216106 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.216123 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.218847 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.218867 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.218965 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.218978 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.219000 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.219012 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.219065 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 102.577298, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.268874 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.269799 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.269815 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.270682 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.270695 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.273591 -INFO: TimeDuration, Event = Pool_end, Time = 0.002896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.300874 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.301597 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.301611 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.302259 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.302272 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.305466 -INFO: TimeDuration, Event = Pool_end, Time = 0.003195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.326647 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.327133 -INFO: TimeDuration, Event = Add_end, Time = 0.000486 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.327147 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.327485 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.340239 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.340549 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.340560 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.340791 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.356076 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.356383 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.356565 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.356792 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.356811 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.359369 -INFO: TimeDuration, Event = Pool_end, Time = 0.002557 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.359387 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.359485 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.359498 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.359522 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.359535 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.359584 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 100.149276, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.402180 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.403103 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.403120 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.403982 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.403995 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.409030 -INFO: TimeDuration, Event = Pool_end, Time = 0.005035 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.434207 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.434929 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.434942 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.435588 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.435600 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.438782 -INFO: TimeDuration, Event = Pool_end, Time = 0.003182 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.456562 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.457027 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.457040 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.457373 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.470045 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.470354 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.470366 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.470596 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.484875 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.485181 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.485194 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.485424 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.485441 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.488168 -INFO: TimeDuration, Event = Pool_end, Time = 0.002727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.488186 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.488285 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.488299 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.488326 -INFO: TimeDuration, Event = Add_end, Time = 0.000027 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.488573 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.488618 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.150039, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.526119 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.527046 -INFO: TimeDuration, Event = Add_end, Time = 0.000927 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.527061 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.527926 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.527939 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.530840 -INFO: TimeDuration, Event = Pool_end, Time = 0.002901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.558024 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.558744 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.558758 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.559403 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000645 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.559415 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.562614 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.580396 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.580863 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.580877 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.581213 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.593980 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.594291 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.594305 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.594536 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.608694 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.608997 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.609010 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.609240 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.609256 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.611990 -INFO: TimeDuration, Event = Pool_end, Time = 0.002734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.612010 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.612107 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.612119 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.612143 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.612157 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.612200 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.937245, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.654583 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.655507 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.655523 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.656394 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000871 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.656619 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.660551 -INFO: TimeDuration, Event = Pool_end, Time = 0.003933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.686487 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.687209 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.687222 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.687872 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.687882 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.691078 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.708854 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.709320 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.709333 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.709665 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.722330 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.722661 -INFO: TimeDuration, Event = Add_end, Time = 0.000331 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.722674 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.722901 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000226 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.737066 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.737370 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.737382 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.737614 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.737632 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.740360 -INFO: TimeDuration, Event = Pool_end, Time = 0.002728 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.740582 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.740684 -INFO: TimeDuration, Event = Mul_end, Time = 0.000102 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.740697 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.740719 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.740734 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.740778 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.778130, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.778004 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.778930 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.778944 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.779805 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000860 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.779817 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.782726 -INFO: TimeDuration, Event = Pool_end, Time = 0.002909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.809920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.810643 -INFO: TimeDuration, Event = Add_end, Time = 0.000724 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.810656 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.811304 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.811316 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.814507 -INFO: TimeDuration, Event = Pool_end, Time = 0.003191 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.832279 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.832746 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.832760 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.833092 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.845757 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.846065 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.846078 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.846307 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.860487 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.860791 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.860806 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.861036 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.861052 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.863790 -INFO: TimeDuration, Event = Pool_end, Time = 0.002738 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.863810 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.863908 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.863921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.863944 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.863959 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.864011 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.885247, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.901161 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.902086 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.902100 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.902962 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.902975 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.905877 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.933083 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.933803 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.933817 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.934464 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.934475 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.937675 -INFO: TimeDuration, Event = Pool_end, Time = 0.003200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.955447 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.955913 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.955927 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.956261 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.968950 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.969259 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.969271 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.969500 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.983642 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.983946 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352766.983958 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352766.984188 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352766.984204 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352766.986954 -INFO: TimeDuration, Event = Pool_end, Time = 0.002750 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352766.986974 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352766.987072 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352766.987084 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352766.987106 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352766.987118 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352766.987161 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.788767, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.024164 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.025084 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.025101 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.025959 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000858 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.025974 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.028907 -INFO: TimeDuration, Event = Pool_end, Time = 0.002933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.056052 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.056775 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.056791 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.057435 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000644 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.057447 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.060643 -INFO: TimeDuration, Event = Pool_end, Time = 0.003196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.078396 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.078862 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.078875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.079210 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.091865 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.092172 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.092184 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.092413 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.106608 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.106911 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.106923 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.107151 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.107167 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.109903 -INFO: TimeDuration, Event = Pool_end, Time = 0.002736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.109923 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.110020 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.110032 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.110053 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.110067 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.110109 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.448193, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.148316 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.149239 -INFO: TimeDuration, Event = Add_end, Time = 0.000923 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.149255 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.150118 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.150130 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.153029 -INFO: TimeDuration, Event = Pool_end, Time = 0.002898 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.180239 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.180961 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.181097 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.181738 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000641 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.181749 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.184830 -INFO: TimeDuration, Event = Pool_end, Time = 0.003081 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.202612 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.203077 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.203090 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.203425 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.216095 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.216405 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.216572 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.216801 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.230841 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.231147 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.231159 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.231391 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.231436 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.234133 -INFO: TimeDuration, Event = Pool_end, Time = 0.002697 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.234156 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.234255 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.234267 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.234289 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.234303 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.234347 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.530501, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.271996 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.272925 -INFO: TimeDuration, Event = Add_end, Time = 0.000929 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.272944 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.273808 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.273823 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.276716 -INFO: TimeDuration, Event = Pool_end, Time = 0.002893 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.303833 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.304554 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.304565 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.305220 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000655 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.305232 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.308445 -INFO: TimeDuration, Event = Pool_end, Time = 0.003213 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.330767 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.331236 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.331250 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.331585 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.344384 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.344692 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.344705 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.344935 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.360666 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.360977 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.360991 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.361221 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.361240 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.363960 -INFO: TimeDuration, Event = Pool_end, Time = 0.002720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.363980 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.364078 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.364091 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.364112 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.364125 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.364169 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 101.741349, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.411455 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.412441 -INFO: TimeDuration, Event = Add_end, Time = 0.000986 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.412601 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.413465 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.413481 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.418282 -INFO: TimeDuration, Event = Pool_end, Time = 0.004802 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.444376 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.445096 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.445110 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.445757 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.445770 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.448947 -INFO: TimeDuration, Event = Pool_end, Time = 0.003178 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.466719 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.467183 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.467196 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.467531 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.480204 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.480512 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.480564 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.480793 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.494927 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.495232 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.495245 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.495477 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.495494 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.498220 -INFO: TimeDuration, Event = Pool_end, Time = 0.002726 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.498240 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.498340 -INFO: TimeDuration, Event = Mul_end, Time = 0.000100 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.498353 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.498375 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.498388 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.498439 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 101.353540, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.535699 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.536616 -INFO: TimeDuration, Event = Add_end, Time = 0.000917 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.536633 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.537499 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.537513 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.540390 -INFO: TimeDuration, Event = Pool_end, Time = 0.002877 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.567614 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.568334 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.568586 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.569231 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000645 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.569243 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.572201 -INFO: TimeDuration, Event = Pool_end, Time = 0.002958 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.593885 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.594354 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.594368 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.594706 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.607598 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.607905 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.607918 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.608146 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.623477 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.623785 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.623799 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.624030 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.624047 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.626771 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.626791 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.626889 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.626901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.626923 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.626935 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.626984 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 100.841580, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.669597 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.670508 -INFO: TimeDuration, Event = Add_end, Time = 0.000911 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.670524 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.671385 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.671400 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.676480 -INFO: TimeDuration, Event = Pool_end, Time = 0.005080 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.702129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.702847 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.702861 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.703506 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.703517 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.706702 -INFO: TimeDuration, Event = Pool_end, Time = 0.003184 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.724575 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.725039 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.725051 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.725383 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.743256 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.743573 -INFO: TimeDuration, Event = Add_end, Time = 0.000317 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.743587 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.743814 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.758050 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.758357 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.758371 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.758602 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.758621 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.761341 -INFO: TimeDuration, Event = Pool_end, Time = 0.002721 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.761362 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.761461 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.761473 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.761495 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.761507 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.761551 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 101.913287, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.799110 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.800034 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.800049 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.800920 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.800938 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.803831 -INFO: TimeDuration, Event = Pool_end, Time = 0.002894 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.834253 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.834979 -INFO: TimeDuration, Event = Add_end, Time = 0.000725 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.834992 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.835642 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.835654 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.838834 -INFO: TimeDuration, Event = Pool_end, Time = 0.003179 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.856591 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.857058 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.857071 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.857406 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.870213 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.870523 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.870536 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.870767 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.886331 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.886638 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.886652 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.886884 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.886901 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.889625 -INFO: TimeDuration, Event = Pool_end, Time = 0.002724 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352767.889645 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352767.889745 -INFO: TimeDuration, Event = Mul_end, Time = 0.000100 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.889775 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.889796 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352767.889811 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352767.889870 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000059 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 100.244905, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.929952 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.930874 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.930889 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.931752 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.931767 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.934670 -INFO: TimeDuration, Event = Pool_end, Time = 0.002904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.961862 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.962582 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.962594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.963243 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352767.963254 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352767.966451 -INFO: TimeDuration, Event = Pool_end, Time = 0.003197 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.984216 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.987707 -INFO: TimeDuration, Event = Add_end, Time = 0.003491 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.987731 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.988084 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000353 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352767.997952 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352767.998262 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352767.998276 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352767.998504 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.012683 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.012985 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.012997 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.013229 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.013244 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.015981 -INFO: TimeDuration, Event = Pool_end, Time = 0.002737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.016000 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.016105 -INFO: TimeDuration, Event = Mul_end, Time = 0.000105 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.016118 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.016140 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.016153 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.016204 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.082270, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.053755 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.054680 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.054696 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.055563 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000867 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.055576 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.058475 -INFO: TimeDuration, Event = Pool_end, Time = 0.002899 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.085651 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.086372 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.086384 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.087031 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.087042 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.090241 -INFO: TimeDuration, Event = Pool_end, Time = 0.003200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.113898 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.114370 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.114384 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.114719 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.127342 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.127651 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.127663 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.127899 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000236 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.143432 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.143738 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.143751 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.143981 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.143998 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.146727 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.146747 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.146845 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.146858 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.146879 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.146893 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.146935 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 103.064785, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.189964 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.190891 -INFO: TimeDuration, Event = Add_end, Time = 0.000927 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.190906 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.191772 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.191786 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.196797 -INFO: TimeDuration, Event = Pool_end, Time = 0.005011 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.221904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.222627 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.222640 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.223289 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.223300 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.226492 -INFO: TimeDuration, Event = Pool_end, Time = 0.003192 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.244271 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.244736 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.244751 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.245093 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000342 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.257741 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.258049 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.258061 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.258290 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.273960 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.274279 -INFO: TimeDuration, Event = Add_end, Time = 0.000318 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.274292 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.274526 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.274543 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.277267 -INFO: TimeDuration, Event = Pool_end, Time = 0.002723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.277288 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.277408 -INFO: TimeDuration, Event = Mul_end, Time = 0.000120 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.277438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.277463 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.277478 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.277526 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.415656, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.322233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.323190 -INFO: TimeDuration, Event = Add_end, Time = 0.000957 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.323218 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.324095 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000877 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.324120 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.326994 -INFO: TimeDuration, Event = Pool_end, Time = 0.002874 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.354302 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.355059 -INFO: TimeDuration, Event = Add_end, Time = 0.000757 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.355073 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.355740 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000666 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.355752 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.358899 -INFO: TimeDuration, Event = Pool_end, Time = 0.003148 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.376669 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.377134 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.377148 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.377481 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.390125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.390436 -INFO: TimeDuration, Event = Add_end, Time = 0.000311 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.390450 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.390680 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.404849 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.405154 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.405166 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.405397 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.405413 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.408142 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.408160 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.408257 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.408269 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.408289 -INFO: TimeDuration, Event = Add_end, Time = 0.000020 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.408302 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.408369 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000066 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.074970, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.451141 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.452067 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.452083 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.452954 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.452969 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.455855 -INFO: TimeDuration, Event = Pool_end, Time = 0.002887 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.482996 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.483720 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.483732 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.484383 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.484579 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.487595 -INFO: TimeDuration, Event = Pool_end, Time = 0.003015 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.505366 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.505833 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.505846 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.506180 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.518816 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.519125 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.519136 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.519367 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.533533 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.533838 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.533850 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.534081 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.534098 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.536828 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.536847 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.536945 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.536958 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.536981 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.536994 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.537039 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.454600, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.579786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.580711 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.580728 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.581593 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.581608 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.584499 -INFO: TimeDuration, Event = Pool_end, Time = 0.002891 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.611684 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.612409 -INFO: TimeDuration, Event = Add_end, Time = 0.000725 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.612596 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.613244 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.613256 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.618372 -INFO: TimeDuration, Event = Pool_end, Time = 0.005117 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.636125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.638706 -INFO: TimeDuration, Event = Add_end, Time = 0.002580 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.638732 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.639084 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000352 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.649783 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.650096 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.650108 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.650337 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.665565 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.665871 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.665885 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.666118 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.666139 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.668864 -INFO: TimeDuration, Event = Pool_end, Time = 0.002725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.668886 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.669007 -INFO: TimeDuration, Event = Mul_end, Time = 0.000121 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.669020 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.669045 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.669059 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.669130 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000071 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.670549, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.711575 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.712506 -INFO: TimeDuration, Event = Add_end, Time = 0.000932 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.712520 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.713386 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000866 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.713399 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.716296 -INFO: TimeDuration, Event = Pool_end, Time = 0.002897 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.743474 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.744194 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.744207 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.744853 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.744865 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.748062 -INFO: TimeDuration, Event = Pool_end, Time = 0.003197 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.765829 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.766294 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.766307 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.766639 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.779252 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.779559 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.779571 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.779802 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.793976 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.794279 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.794292 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.794521 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.794536 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.797268 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.797288 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.797386 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.797399 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.797420 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.797433 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.797483 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.390191, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.839505 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.840430 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.840613 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.841477 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.841492 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.845235 -INFO: TimeDuration, Event = Pool_end, Time = 0.003743 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.871348 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.872069 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.872083 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.872737 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000654 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.872750 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.879116 -INFO: TimeDuration, Event = Pool_end, Time = 0.006366 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.897012 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.897479 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.897492 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.897833 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000341 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.915737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.916049 -INFO: TimeDuration, Event = Add_end, Time = 0.000313 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.916063 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.916293 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.931889 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.932196 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.932209 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.932442 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.932577 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.935180 -INFO: TimeDuration, Event = Pool_end, Time = 0.002603 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352768.935199 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352768.935297 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.935309 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.935329 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352768.935342 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352768.935391 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 104.958454, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352768.978168 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352768.979095 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352768.979109 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352768.979974 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352768.979987 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352768.982889 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.010272 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.010993 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.011007 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.011657 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.011669 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.014857 -INFO: TimeDuration, Event = Pool_end, Time = 0.003188 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.033695 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.034163 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.034177 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.034513 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.047098 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.047408 -INFO: TimeDuration, Event = Add_end, Time = 0.000310 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.047422 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.047653 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.061810 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.062114 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.062127 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.062361 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.062395 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.065106 -INFO: TimeDuration, Event = Pool_end, Time = 0.002711 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.065126 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.065224 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.065237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.065259 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.065273 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.065316 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.661260, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.109327 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.110253 -INFO: TimeDuration, Event = Add_end, Time = 0.000926 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.110267 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.111125 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000857 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.111137 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.114045 -INFO: TimeDuration, Event = Pool_end, Time = 0.002908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.141227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.141950 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.141979 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.142628 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.142639 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.145823 -INFO: TimeDuration, Event = Pool_end, Time = 0.003184 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.163612 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.164085 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.164099 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.164441 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000343 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.177076 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.177383 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.177396 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.177624 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.191775 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.192078 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.192089 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.192332 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000243 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.192583 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.195071 -INFO: TimeDuration, Event = Pool_end, Time = 0.002488 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.195091 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.195189 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.195201 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.195225 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.195240 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.195287 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.325373, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.242850 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.243813 -INFO: TimeDuration, Event = Add_end, Time = 0.000963 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.243830 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.244692 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000862 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.244710 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.247567 -INFO: TimeDuration, Event = Pool_end, Time = 0.002857 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.274700 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.275438 -INFO: TimeDuration, Event = Add_end, Time = 0.000738 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.275451 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.276098 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.276109 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.279286 -INFO: TimeDuration, Event = Pool_end, Time = 0.003177 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.297070 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.297538 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.297552 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.297888 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.310530 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.310838 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.310850 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.311079 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.325269 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.325573 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.325586 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.325816 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.325832 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.328567 -INFO: TimeDuration, Event = Pool_end, Time = 0.002734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.328587 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.328685 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.328698 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.328719 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.328733 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.328858 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000125 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.496038, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.371429 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.372353 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.372367 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.373237 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.373251 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.376143 -INFO: TimeDuration, Event = Pool_end, Time = 0.002892 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.403318 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.404039 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.404052 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.404702 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000650 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.404716 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.407910 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.425672 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.426136 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.426149 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.426483 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.439087 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.439394 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.439406 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.439636 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.453803 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.454107 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.454119 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.454348 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.454380 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.457094 -INFO: TimeDuration, Event = Pool_end, Time = 0.002714 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.457114 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.457212 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.457224 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.457246 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.457259 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.457304 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.411006, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.494538 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.495458 -INFO: TimeDuration, Event = Add_end, Time = 0.000921 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.495473 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.496343 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000870 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.496355 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.499222 -INFO: TimeDuration, Event = Pool_end, Time = 0.002868 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.526370 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.527089 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.527103 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.527749 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.527761 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.530964 -INFO: TimeDuration, Event = Pool_end, Time = 0.003204 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.548743 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.549223 -INFO: TimeDuration, Event = Add_end, Time = 0.000480 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.549237 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.549574 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.562200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.562509 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.562537 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.562768 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.576919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.577221 -INFO: TimeDuration, Event = Add_end, Time = 0.000302 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.577233 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.577462 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.577478 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.580212 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.580231 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.580331 -INFO: TimeDuration, Event = Mul_end, Time = 0.000100 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.580581 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.580606 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.580620 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.580665 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.457044, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.618068 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.619032 -INFO: TimeDuration, Event = Add_end, Time = 0.000965 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.619048 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.619912 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.619925 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.622788 -INFO: TimeDuration, Event = Pool_end, Time = 0.002862 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.649959 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.650681 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.650693 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.651345 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.651355 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.654548 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.674421 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.674893 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.674922 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.675257 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.687892 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.688201 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.688214 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.688445 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.702630 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.702935 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.702948 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.703176 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.703194 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.705926 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.705946 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.706045 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.706057 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.706079 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.706091 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.706134 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 98.011342, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.745927 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.746855 -INFO: TimeDuration, Event = Add_end, Time = 0.000928 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.746869 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.747733 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000864 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.747746 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.750647 -INFO: TimeDuration, Event = Pool_end, Time = 0.002901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.777785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.778506 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.778518 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.779163 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000644 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.779174 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.782374 -INFO: TimeDuration, Event = Pool_end, Time = 0.003200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.800133 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.800597 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.800612 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.800947 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.813587 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.813896 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.813908 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.814136 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.828279 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.828584 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.828597 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.828826 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.828844 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.831576 -INFO: TimeDuration, Event = Pool_end, Time = 0.002732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.831595 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.831692 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.831705 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.831726 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.831739 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.831782 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 96.578383, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.869188 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.870113 -INFO: TimeDuration, Event = Add_end, Time = 0.000925 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.870128 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.870989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.871002 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.873909 -INFO: TimeDuration, Event = Pool_end, Time = 0.002907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.901074 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.901793 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.901807 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.902456 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.902466 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.905659 -INFO: TimeDuration, Event = Pool_end, Time = 0.003192 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.923419 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.923886 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.923900 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.924233 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.936890 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.937198 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.937210 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.937441 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.951583 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.951887 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352769.951898 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352769.952133 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352769.952149 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352769.954876 -INFO: TimeDuration, Event = Pool_end, Time = 0.002727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352769.954896 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352769.954994 -INFO: TimeDuration, Event = Mul_end, Time = 0.000098 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352769.955007 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352769.955029 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352769.955042 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352769.955084 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000042 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 81.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.446189, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.000905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.001876 -INFO: TimeDuration, Event = Add_end, Time = 0.000971 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.001894 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.002755 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000861 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.002768 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.005607 -INFO: TimeDuration, Event = Pool_end, Time = 0.002839 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.032839 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.033557 -INFO: TimeDuration, Event = Add_end, Time = 0.000719 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.033570 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.034217 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000647 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.034229 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.037427 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.056246 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.056713 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.056728 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.057065 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000337 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.069725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.070034 -INFO: TimeDuration, Event = Add_end, Time = 0.000309 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.070046 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.070276 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.084454 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.084758 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.084770 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.085003 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.085018 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.087748 -INFO: TimeDuration, Event = Pool_end, Time = 0.002729 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.087767 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.087863 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.087876 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.087897 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.087909 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.087977 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000068 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 105.011521, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.127749 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.128670 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.128688 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.129552 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.129565 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.132471 -INFO: TimeDuration, Event = Pool_end, Time = 0.002906 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.159697 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.160415 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.160426 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.161071 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000645 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.161083 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.164291 -INFO: TimeDuration, Event = Pool_end, Time = 0.003208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.182060 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.182526 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.182538 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.182873 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.195527 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.195834 -INFO: TimeDuration, Event = Add_end, Time = 0.000307 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.195847 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.196075 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.210259 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.210564 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.210577 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.210805 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.210821 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.213555 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.213575 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.213672 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.213685 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.213706 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.213720 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.213763 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 97.443060, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.251749 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.252663 -INFO: TimeDuration, Event = Add_end, Time = 0.000914 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.252681 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.253541 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000859 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.253555 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.256464 -INFO: TimeDuration, Event = Pool_end, Time = 0.002910 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.283743 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.284466 -INFO: TimeDuration, Event = Add_end, Time = 0.000722 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.284476 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.285128 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000652 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.285141 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.288351 -INFO: TimeDuration, Event = Pool_end, Time = 0.003209 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.306106 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.306572 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.306585 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.306918 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.319545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.319854 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.319865 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.320096 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.334275 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.334579 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.334591 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.334823 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.334839 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.337569 -INFO: TimeDuration, Event = Pool_end, Time = 0.002730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.337589 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.337687 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.337699 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.337720 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.337733 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.337777 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 82.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.444106, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.375452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.376404 -INFO: TimeDuration, Event = Add_end, Time = 0.000952 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.376418 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.377275 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000857 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.377290 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.380174 -INFO: TimeDuration, Event = Pool_end, Time = 0.002884 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.407452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.408175 -INFO: TimeDuration, Event = Add_end, Time = 0.000723 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.408189 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.408840 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000651 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.408854 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.412042 -INFO: TimeDuration, Event = Pool_end, Time = 0.003188 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.429845 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.430311 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.430324 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.430658 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.443332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.443640 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.443652 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.443884 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.458068 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.458374 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.458386 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.458617 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.458633 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.461360 -INFO: TimeDuration, Event = Pool_end, Time = 0.002726 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.461379 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.461475 -INFO: TimeDuration, Event = Mul_end, Time = 0.000096 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.461488 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.461510 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.461524 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.461569 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.705714, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.499318 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.500240 -INFO: TimeDuration, Event = Add_end, Time = 0.000922 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.500256 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.501119 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000863 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.501134 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.504039 -INFO: TimeDuration, Event = Pool_end, Time = 0.002904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.531316 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.532037 -INFO: TimeDuration, Event = Add_end, Time = 0.000721 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.532052 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.532700 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.532714 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.535907 -INFO: TimeDuration, Event = Pool_end, Time = 0.003194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.553673 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.554137 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.554149 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.554483 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.567153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.567462 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.567474 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.567703 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.581874 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.582179 -INFO: TimeDuration, Event = Add_end, Time = 0.000305 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.582191 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.582422 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.582439 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.585170 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.585189 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.585287 -INFO: TimeDuration, Event = Mul_end, Time = 0.000097 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.585299 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.585320 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.585334 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.585377 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 80.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.643167, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.622747 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.623672 -INFO: TimeDuration, Event = Add_end, Time = 0.000924 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.623686 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.624552 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.624597 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.627464 -INFO: TimeDuration, Event = Pool_end, Time = 0.002867 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.654707 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.655427 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.655440 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.656085 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000646 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.656098 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.659297 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.680789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.681257 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.681270 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.681606 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.694441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.694748 -INFO: TimeDuration, Event = Add_end, Time = 0.000306 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.694760 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.694989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.709224 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.709528 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.709541 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.709770 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.709788 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.712519 -INFO: TimeDuration, Event = Pool_end, Time = 0.002731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.712570 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.712669 -INFO: TimeDuration, Event = Mul_end, Time = 0.000099 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.712682 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.712703 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.712717 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.712766 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 79.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.519238, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.755507 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.756481 -INFO: TimeDuration, Event = Add_end, Time = 0.000974 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.756616 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.757480 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000865 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.757493 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.761279 -INFO: TimeDuration, Event = Pool_end, Time = 0.003786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.787415 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.788135 -INFO: TimeDuration, Event = Add_end, Time = 0.000720 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.788146 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.788795 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000649 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.788809 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.792007 -INFO: TimeDuration, Event = Pool_end, Time = 0.003198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.809779 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.810245 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.810274 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.810606 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.823273 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.823581 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.823594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.823822 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.837996 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.838299 -INFO: TimeDuration, Event = Add_end, Time = 0.000303 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.838313 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.838543 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.838559 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.841293 -INFO: TimeDuration, Event = Pool_end, Time = 0.002734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.841312 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.841414 -INFO: TimeDuration, Event = Mul_end, Time = 0.000102 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.841428 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.841450 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.841464 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.841506 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 78.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 99.043276, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 131072000 -DEBUG: Attempting to Allocate = 131072000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 65536, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.878670 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 32768000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.879589 -INFO: TimeDuration, Event = Add_end, Time = 0.000920 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.879604 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.880592 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000987 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.880609 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.884442 -INFO: TimeDuration, Event = Pool_end, Time = 0.003834 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 98304000 -DEBUG: Attempting to Allocate = 98304000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 49152, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.910615 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 24576000 -INFO: bias->num_elems = 192 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.911333 -INFO: TimeDuration, Event = Add_end, Time = 0.000718 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.911347 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.911995 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000648 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.912006 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 192, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 24576000 -DEBUG: Attempting to Allocate = 24576000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 12288, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.915206 -INFO: TimeDuration, Event = Pool_end, Time = 0.003199 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 49152000 -DEBUG: Attempting to Allocate = 49152000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 24576, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.933003 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 12288000 -INFO: bias->num_elems = 384 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.933467 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.933479 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.933814 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000335 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.946466 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.946774 -INFO: TimeDuration, Event = Add_end, Time = 0.000308 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.946785 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.947016 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.961226 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 256 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.961531 -INFO: TimeDuration, Event = Add_end, Time = 0.000304 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352770.961543 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352770.961772 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352770.961787 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 256, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352770.964537 -INFO: TimeDuration, Event = Pool_end, Time = 0.002750 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352770.964678 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 4096 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352770.964797 -INFO: TimeDuration, Event = Mul_end, Time = 0.000119 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352770.964812 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352770.964851 -INFO: TimeDuration, Event = Add_end, Time = 0.000039 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352770.964865 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352770.964910 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 76.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: 0.000000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 95.737806, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -Exiting profiler -INFO: Writing Runtime Profile Info File... -INFO: Done writing profile. diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt deleted file mode 100644 index a6d177c90d5a2890afa5387d4c2a50de1cb6c852..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs.txt +++ /dev/null @@ -1,11 +0,0 @@ -2000 -+++++ -conf1 3.86 0 79.1 0.0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 -7 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs_base.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs_base.txt deleted file mode 100644 index c3bc2335227cf06169b1f3d105314fdc9647d97d..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/tuner_confs_base.txt +++ /dev/null @@ -1,20 +0,0 @@ -+++++ -conf1 1 0 79.9 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf2 1.5 0 79.9 0 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 1 add fp16 1 -7 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/predictive/alexnet.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/predictive/alexnet.txt deleted file mode 100644 index a9ccba6eb63f620c0e3b6f95fd7c50892018f00f..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/predictive/alexnet.txt +++ /dev/null @@ -1,511 +0,0 @@ -2592.187221 -+++++ -conf1 1 1 79.28 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 -7 gpu softmax fp32 1 ------ -+++++ -conf2 1.7593976485873195 1.6193399031642917 79.23 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf3 2.092625440752526 1.9139078015388271 78.96 0.3200000000000074 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf4 1.8870195448805414 1.7296919053025768 78.8 0.480000000000004 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf5 2.1184804041774554 1.9598989563949536 78.75999999999999 0.5200000000000102 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf6 2.1184804041774554 1.9598989563949536 78.75999999999999 0.5200000000000102 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf7 2.0933825381386364 1.9150743378318535 78.64 0.6400000000000006 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf8 2.081712090729918 1.9102226906341664 78.5 0.7800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf9 2.081712090729918 1.9102226906341664 78.5 0.7800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf10 2.2662606588487595 2.066560750795139 78.48 0.7999999999999972 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf11 2.121684761285686 1.966318179285323 78.48 0.7999999999999972 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf12 2.3417491169395532 2.1355030360671465 78.38000000000001 0.8999999999999915 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf13 2.2247938983110425 2.060416584958474 78.38000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf14 2.2247938983110425 2.060416584958474 78.38000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf15 2.2247938983110425 2.060416584958474 78.38000000000001 0.8999999999999915 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf16 2.2627828537139263 2.065683616898884 78.32000000000001 0.9599999999999937 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf17 2.2627828537139263 2.065683616898884 78.32000000000001 0.9599999999999937 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf18 2.2627828537139263 2.065683616898884 78.32000000000001 0.9599999999999937 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf19 2.146571989407323 1.95711703610764 78.18 1.0999999999999943 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf20 2.303316973793268 2.1036463961913276 78.10000000000001 1.1799999999999926 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf21 2.436875653706139 2.2434837737118056 78.08 1.2000000000000028 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf22 2.436875653706139 2.2434837737118056 78.08 1.2000000000000028 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf23 2.436875653706139 2.2434837737118056 78.08 1.2000000000000028 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf24 2.1106508925330925 1.9419233584234938 78.06 1.2199999999999989 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf25 2.3203534290038634 2.116965679235447 78.06 1.2199999999999989 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf26 2.3527290658539215 2.145832257234814 78.03999999999999 1.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf27 2.3527290658539215 2.145832257234814 78.03999999999999 1.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf28 2.432854949808342 2.2424500615508003 78.0 1.2800000000000011 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf29 2.432854949808342 2.2424500615508003 78.0 1.2800000000000011 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf30 2.432854949808342 2.2424500615508003 78.0 1.2800000000000011 -1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf31 2.3137982135449207 2.1281257317083417 77.84 1.4399999999999977 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf32 2.1198074418988333 1.9522214255218437 77.82 1.460000000000008 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf33 2.246924974355375 2.065289762405701 77.8 1.480000000000004 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf34 2.263614734554485 2.090777846534249 77.74 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf35 2.263614734554485 2.090777846534249 77.74 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf36 2.263614734554485 2.090777846534249 77.74 1.5400000000000063 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf37 2.5289288699015304 2.334007588396142 77.72 1.5600000000000023 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf38 2.5289288699015304 2.334007588396142 77.72 1.5600000000000023 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf39 2.3117594882585775 2.1152397180868943 77.56 1.7199999999999989 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf40 2.452732477854469 2.264573687601476 77.56 1.7199999999999989 -1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf41 2.452732477854469 2.264573687601476 77.56 1.7199999999999989 -1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf42 2.382518688546389 2.178614303992064 77.5 1.7800000000000011 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf43 2.382518688546389 2.178614303992064 77.5 1.7800000000000011 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf44 2.3900667100485924 2.188128526401265 77.48 1.7999999999999972 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf45 2.3900667100485924 2.188128526401265 77.48 1.7999999999999972 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf46 2.3900667100485924 2.188128526401265 77.48 1.7999999999999972 -1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf47 2.4835281673276515 2.279527076032239 77.3 1.980000000000004 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf48 2.4835281673276515 2.279527076032239 77.3 1.980000000000004 -1 gpu conv samp_fp16 264 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf49 2.1553694968551302 1.9959124044028933 77.18 2.0999999999999943 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 265 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf50 2.5877520959724816 2.3763616521050364 77.03999999999999 2.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ -+++++ -conf51 2.5877520959724816 2.3763616521050364 77.03999999999999 2.240000000000009 -1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 -7 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt deleted file mode 100644 index 50e026a7bfca4b66049080501b3abb299e882bd2..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_confs_batch220.txt +++ /dev/null @@ -1,1771 +0,0 @@ -+++++ -conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf1 1.9038241963 0 84.979996 0.18000400000000527 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf2 1.64842307895 0 84.680008 0.4799920000000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf3 1.72558649193 0 84.500008 0.6599920000000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf4 1.82944424391 0 84.279999 0.7200015000000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf5 1.82944424391 0 84.199997 0.8400045000000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf6 1.64842307895 0 84.139999 0.930001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf7 1.72558649193 0 84.919998 0.24000199999999838 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf8 1.79160865678 0 85.259995 -0.09999499999999839 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf9 1.82944424391 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf10 1.70856970404 0 85.300003 -0.14000299999999866 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf11 1.64842307895 0 84.12001 0.9599850000000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf12 1.82944424391 0 84.339996 0.6300060000000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf13 1.81032878247 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf14 1.64842307895 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf15 1.9038241963 0 84.900002 0.2599980000000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf16 1.82944424391 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf17 1.74294565288 0 84.139999 0.930001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf18 1.88313156795 0 84.159996 0.9000059999999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf19 1.66425732228 0 85.220001 -0.0600009999999912 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf20 1.63288729942 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf21 1.74294565288 0 84.559998 0.600002000000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf22 1.64842307895 0 84.279999 0.7200015000000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf23 1.72558649193 0 84.119995 0.9600075000000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 31 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf24 1.72558649193 0 85.080002 0.07999800000001189 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf25 1.82944424391 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf26 1.79160865678 0 84.220001 0.8099985000000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf27 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf28 1.81032878247 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf29 1.70856970404 0 85.479996 -0.31999599999999473 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf30 1.72558649193 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf31 1.79160865678 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf32 1.9038241963 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf33 1.82944424391 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf34 1.64842307895 0 85.280006 -0.12000599999999506 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf35 1.73481954522 0 83.559998 1.800003000000018 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf36 1.70856970404 0 83.800003 1.439995500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf37 1.81032878247 0 83.639999 1.680001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf38 1.72558649193 0 83.619995 1.7100075000000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf39 1.81032878247 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf40 1.72558649193 0 85.020004 0.139996000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf41 1.80156379054 0 83.720009 1.5599865000000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf42 1.79160865678 0 83.5 1.8900000000000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf43 1.67284410055 0 84.040001 1.079998500000002 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf44 1.73481954522 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf45 1.79160865678 0 84.319992 0.6600120000000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf46 1.61991030088 0 83.580002 1.7699970000000178 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv perf 25 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf47 1.61991030088 0 83.860001 1.3499985000000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv perf 28 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf48 1.79160865678 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf49 1.71762107501 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf50 1.72558649193 0 84.720001 0.4399990000000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf51 1.82049363128 0 83.879997 1.3200045000000031 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf52 1.82049363128 0 84.12001 0.9599850000000174 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf53 1.72558649193 0 83.860001 1.3499985000000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf54 1.73481954522 0 85.419998 -0.2599980000000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf55 1.7523659141 0 84.759995 0.4000050000000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf56 1.65684682663 0 84.800003 0.35999700000000134 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf57 1.88313156795 0 83.720001 1.5599985000000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf58 1.82049363128 0 84.060005 1.0499925000000019 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf59 1.82049363128 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf60 1.7523659141 0 84.379997 0.5700045000000031 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf61 1.82049363128 0 84.120003 0.9599955000000122 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf62 1.70856970404 0 83.900002 1.2899970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf63 1.79160865678 0 83.559998 1.800003000000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf64 1.71762107501 0 85.040001 0.11999900000000141 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf65 1.81032878247 0 83.740005 1.529992500000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf66 1.73481954522 0 84.719994 0.44000600000000534 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf67 1.70856970404 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf68 1.72558649193 0 84.18 0.8699999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf69 1.61991030088 0 83.480003 1.919995500000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv perf 25 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf70 1.82049363128 0 83.860001 1.3499985000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf71 1.58558171041 0 85.639999 -0.47999899999999796 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf72 1.88313156795 0 83.639999 1.680001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf73 1.63288729942 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf74 1.73481954522 0 83.680008 1.6199880000000064 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf75 1.81032878247 0 83.939995 1.2300075000000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf76 1.67284410055 0 83.639999 1.680001500000003 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf77 1.7523659141 0 83.599998 1.7400030000000086 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf78 1.61991030088 0 84.119995 0.9600075000000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv perf 23 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf79 1.81032878247 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf80 1.71762107501 0 84.860001 0.29999900000000823 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf81 1.81032878247 0 84.080002 1.0199970000000178 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf82 1.67284410055 0 84.680008 0.4799920000000043 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf83 1.82049363128 0 84.199997 0.8400045000000134 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf84 1.82049363128 0 83.959999 1.2000015000000133 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf85 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf86 1.61237267544 0 85.340004 -0.18000399999998817 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf87 1.73481954522 0 83.919998 1.2600029999999975 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf88 1.67284410055 0 84.600006 0.5599940000000118 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf89 1.72558649193 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf90 1.66425732228 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf91 1.74294565288 0 84.559998 0.600002000000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf92 1.82049363128 0 84.440002 0.47999699999999734 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf93 1.71762107501 0 84.360001 0.5999985000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf94 1.81032878247 0 84.240005 0.779992500000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf95 1.66425732228 0 83.619995 1.7100075000000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf96 1.88313156795 0 83.479996 1.9200060000000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf97 1.77662432349 0 83.620003 1.7099955000000122 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf98 1.64115261583 0 86.300003 -1.1400029999999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf99 1.80156379054 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf100 1.79160865678 0 84.099998 0.9900030000000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf101 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf102 1.7669421638 0 83.280006 2.2199910000000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf103 1.70269284588 0 83.780006 1.4699910000000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf104 1.7523659141 0 83.620003 1.7099955000000122 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf105 1.82049363128 0 82.980003 2.669995500000013 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf106 1.80156379054 0 83.259995 2.2500075000000024 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf107 1.65684682663 0 83.300003 2.189995500000002 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf108 1.65684682663 0 82.940002 2.7299969999999973 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf109 1.7523659141 0 83.820007 1.4099895000000018 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf110 1.83623037965 0 83.68 1.6199999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf111 1.83623037965 0 83.759995 1.5000075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf112 1.83623037965 0 82.959999 2.7000015000000133 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf113 1.74910416379 0 83.099991 2.4900135000000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf114 1.82049363128 0 83.440002 1.9799969999999973 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf115 1.7523659141 0 82.800003 2.939995500000002 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf116 1.83623037965 0 83.080002 2.519997000000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf117 1.80156379054 0 83.059998 2.550003000000018 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf118 1.82049363128 0 83.699997 1.5900045000000134 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf119 1.7669421638 0 83.360001 2.0999985000000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf120 1.7669421638 0 84.219994 0.810009000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf121 1.7523659141 0 83.680008 1.6199880000000064 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf122 1.82049363128 0 83.400002 2.0399970000000067 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf123 1.93249146701 0 82.860001 2.8499985000000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf124 1.93249146701 0 83.12001 2.4599850000000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf125 1.65684682663 0 84.68 0.4799999999999983 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf126 1.80156379054 0 83.040001 2.579998500000002 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf127 1.65684682663 0 84.639999 0.520001000000002 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf128 1.7523659141 0 82.860001 2.8499985000000123 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf129 1.69436712239 0 83.360001 2.0999985000000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv perf 23 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf130 1.80156379054 0 83.0 2.6400000000000077 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf131 1.7669421638 0 83.139999 2.430001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf132 1.68612242394 0 83.820007 1.4099895000000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf133 1.70269284588 0 84.099998 0.9900030000000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf134 1.73481954522 0 83.0 2.6400000000000077 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf135 1.82049363128 0 83.840004 1.3799940000000177 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf136 1.7523659141 0 83.580002 1.7699970000000178 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf137 1.82049363128 0 83.940002 1.2299969999999973 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf138 1.73481954522 0 83.360001 2.0999985000000123 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf139 1.73481954522 0 84.139999 0.930001500000003 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf140 1.85589972143 0 83.0 2.6400000000000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf141 1.7523659141 0 83.360001 2.0999985000000123 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf142 1.66987141658 0 84.759995 0.4000050000000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf143 1.7523659141 0 83.199997 2.3400045000000134 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf144 1.72558649193 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf145 1.73481954522 0 83.719994 1.560009000000008 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf146 1.7669421638 0 83.159996 2.4000059999999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf147 1.7669421638 0 83.060005 2.549992500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf148 1.93249146701 0 83.139999 2.430001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf149 1.73481954522 0 83.68 1.6199999999999974 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf150 1.74910416379 0 83.300003 2.189995500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf151 1.74910416379 0 83.780006 1.4699910000000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf152 1.73481954522 0 83.460007 1.949989500000001 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf153 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf154 1.82049363128 0 83.400002 2.0399970000000067 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf155 1.88313156795 0 83.099998 2.4900030000000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf156 1.82049363128 0 83.400002 2.0399970000000067 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf157 1.81032878247 0 84.680008 0.4799920000000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf158 1.7669421638 0 83.840004 1.3799940000000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf159 1.71762107501 0 84.699997 0.46000300000000893 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf160 1.93249146701 0 82.979996 2.670006000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt deleted file mode 100644 index 15e34ccf0d6419a0adc7fa8c51376182a9f6b6e5..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ /dev/null @@ -1,473 +0,0 @@ -+++++ -conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf1 1.9038241963 0 84.979996 0.18000400000000527 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf2 1.79160865678 0 85.259995 -0.09999499999999839 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf3 1.9038241963 0 84.900002 0.2599980000000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf4 1.88313156795 0 84.159996 0.9000059999999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf5 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf6 1.70856970404 0 85.479996 -0.31999599999999473 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf7 1.9038241963 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf8 1.81032878247 0 84.400002 0.5399970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf9 1.72558649193 0 85.020004 0.139996000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf10 1.73481954522 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf11 1.79160865678 0 84.319992 0.6600120000000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf12 1.71762107501 0 85.540001 -0.3800009999999986 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf13 1.72558649193 0 84.720001 0.4399990000000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf14 1.73481954522 0 85.419998 -0.2599980000000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf15 1.7523659141 0 84.759995 0.4000050000000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf16 1.88313156795 0 83.720001 1.5599985000000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf17 1.82049363128 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf18 1.71762107501 0 85.040001 0.11999900000000141 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf19 1.73481954522 0 84.719994 0.44000600000000534 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf20 1.88313156795 0 83.639999 1.680001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf21 1.63288729942 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf22 1.81032878247 0 84.259995 0.7500075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf23 1.71762107501 0 84.860001 0.29999900000000823 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf24 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf25 1.66425732228 0 85.800003 -0.6400029999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf26 1.74294565288 0 84.559998 0.600002000000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf27 1.82049363128 0 84.440002 0.47999699999999734 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf28 1.81032878247 0 84.240005 0.779992500000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf29 1.88313156795 0 83.479996 1.9200060000000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf30 1.64115261583 0 86.300003 -1.1400029999999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf31 1.80156379054 0 84.239998 0.7800030000000078 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf32 1.70856970404 0 85.699997 -0.5399969999999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf33 1.83623037965 0 83.68 1.6199999999999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf34 1.83623037965 0 83.759995 1.5000075000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf35 1.93249146701 0 82.860001 2.8499985000000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf36 1.93249146701 0 83.12001 2.4599850000000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf37 1.72558649193 0 84.82 0.34000000000001196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf38 1.93249146701 0 83.139999 2.430001500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf39 1.88313156795 0 84.120003 0.9599955000000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf40 1.88313156795 0 83.099998 2.4900030000000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf41 1.81032878247 0 84.680008 0.4799920000000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf42 1.93249146701 0 82.979996 2.670006000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt deleted file mode 100644 index 8a3147cd5c674301228cca2adcc4d88a0ecdfb72..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ /dev/null @@ -1,17281 +0,0 @@ -+++++ -conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf1 2.75602585333 0 85.603499975 -0.14349997500000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf2 1.97124355876 0 84.40650025 0.5302496250000175 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf3 2.42513277215 0 84.38499995 0.5625000750000169 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf4 3.15731364232 0 84.32650055 0.650249174999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf5 2.15883788221 0 84.71599955 0.44400044999999866 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf6 2.44524097268 0 84.679500025 0.4804999750000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf7 2.65671333449 0 84.833500925 0.3264990750000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf8 2.75602585333 0 85.19850045 0.26149955000000774 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf9 2.40535258985 0 84.749999375 0.41000062500000356 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf10 3.61386264477 0 84.3579998 0.603000300000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf11 2.42513277215 0 84.584999875 0.5750001250000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf12 2.37819629574 0 84.6939999 0.4660001000000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf13 2.47811701105 0 84.370999875 0.5835001875000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf14 2.32202027362 0 84.536500575 0.623499425 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf15 3.44499877858 0 84.21249925 0.8212511250000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf16 2.71542297875 0 84.429999925 0.4950001125000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf17 2.93135224398 0 85.350500475 0.1094995250000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf18 3.38956467165 0 84.52750015 0.6324998500000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf19 3.51061454803 0 84.459999475 0.4500007875000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf20 2.52912274255 0 84.24849965 0.7672505250000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf21 3.25488617683 0 84.588000525 0.5719994750000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf22 2.23579750603 0 84.972999475 0.18700052500001052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf23 2.61896392029 0 84.6199998 0.5400002000000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf24 2.75602585333 0 84.448499375 0.4672509375000118 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf25 2.19091471805 0 85.341498375 0.11850162500000466 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf26 2.60048866322 0 85.50600055 -0.04600054999999087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf27 3.55290898553 0 84.27399915 0.7290012750000159 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf28 2.60048866322 0 85.5190003 -0.05900029999999673 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf29 2.12156410089 0 85.249500075 0.21049992500000486 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf30 1.63288729942 0 85.540001 -0.0800009999999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf31 2.64243046275 0 84.747 0.41300000000000525 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf32 2.96673931882 0 84.3670011 0.5894983500000137 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf33 3.15731364232 0 84.5144993 0.645500700000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf34 2.93135224398 0 84.598999075 0.5610009250000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf35 2.12156410089 0 85.24549965 0.21450035000000583 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf36 2.52912274255 0 84.636998925 0.5230010750000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf37 2.76636700953 0 84.511999775 0.6480002249999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf38 3.48571779877 0 84.44449935 0.47325097500000624 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf39 3.02145143763 0 84.706499675 0.4535003250000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf40 3.26208692053 0 84.479999525 0.6800004750000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf41 2.73050807996 0 85.514499575 -0.05449957499999697 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf42 3.17089287752 0 84.660999875 0.49900012500000346 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf43 2.90250153505 0 85.458999675 0.0010003250000039654 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf44 3.36634957954 0 84.552500325 0.6074996750000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf45 2.79256263316 0 84.668500675 0.49149932500000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf46 2.42912791142 0 84.414500625 0.5182490625000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf47 2.56430926229 0 84.79049965 0.36950035000000414 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf48 1.72558649193 0 84.899994 0.2600059999999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf49 3.35104871505 0 84.373500225 0.5797496625000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf50 2.03940354341 0 85.71549965 -0.25549964999999303 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf51 3.08174415116 0 84.29850005 0.6922499250000058 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf52 4.35642257412 0 84.669500075 0.49049992500000317 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf53 2.93433101084 0 85.609000775 -0.14900077499999326 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf54 3.48571779877 0 84.803999275 0.3560007250000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf55 2.70079305483 0 83.907500325 1.2787495125000063 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf56 3.42944293235 0 84.067999375 1.0380009375000085 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf57 2.75369879845 0 85.771000375 -0.3110003749999947 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf58 4.68889675944 0 83.9459997 1.2210004500000053 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf59 5.29506075557 0 84.0784999 1.022250150000012 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf60 4.38821376777 0 83.696001175 1.5959982374999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf61 2.93433101084 0 85.736000075 -0.27600007500000173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf62 4.14169933128 0 83.947499675 1.218750487500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf63 4.32387438839 0 83.771999775 1.4820003375000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf64 3.61386264477 0 84.3340002 0.6389996999999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf65 3.42944293235 0 85.239499575 0.22050042500000872 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf66 4.40131209873 0 83.8969994 1.2945009000000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf67 3.97425458366 0 83.9985008 1.1422488000000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf68 2.2528774348 0 84.350000025 0.6149999625000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf69 1.81032878247 0 83.699997 1.5900045000000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf70 4.06183643479 0 83.567500275 1.788749587500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf71 4.749306487 0 84.390500075 0.5542498874999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf72 2.28075482883 0 84.5375 0.6225000000000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf73 4.17689583184 0 83.91850015 1.262249775000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf74 4.55072975326 0 83.6869994 1.6095009000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf75 2.16999135568 0 85.843501325 -0.3835013250000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf76 3.88013266186 0 83.785500925 1.461748612500017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf77 4.60761017185 0 83.649500225 1.665749662500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf78 3.51899268644 0 85.6135004 -0.15350040000000148 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf79 4.94025067672 0 83.966500175 1.190249737500018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf80 2.95809675652 0 85.6330001 -0.1730000999999987 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf81 4.00665156404 0 83.8790009 1.3214986500000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf82 3.38956467165 0 85.33500065 0.12499935000000734 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf83 4.33659088676 0 83.988001225 1.1579981625000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf84 3.26931959511 0 84.842999475 0.317000525000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf85 2.30571969084 0 83.461999975 1.9470000375000112 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf86 2.7508842139 0 84.106000125 0.9809998125000163 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf87 3.370196653 0 83.968499525 1.1872507125000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf88 4.94025067672 0 84.25399945 0.7590008250000153 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf89 3.94816064972 0 84.3739996 0.5790006000000005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf90 3.52359981675 0 85.302000575 0.15799942500001124 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf91 3.05595874161 0 85.42199975 0.03800025000000745 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf92 2.15883788221 0 84.2609997 0.7485004500000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf93 4.48904909382 0 83.8949996 1.2975005999999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf94 3.05595874161 0 85.22250005 0.2374999500000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf95 3.22328457615 0 85.391500875 0.0684991249999996 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf96 3.65869733242 0 83.628499675 1.697250487500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf97 3.13050112438 0 84.350000025 0.6149999625000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf98 4.42774475186 0 83.944500275 1.223249587500014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf99 2.29874094265 0 84.200499275 0.8392510875000099 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf100 4.34358967411 0 84.47900025 0.6809997500000066 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf101 2.98510874485 0 84.6229994 0.5370006000000075 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf102 3.11726491618 0 85.597999975 -0.13799997499998823 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf103 3.09443798634 0 85.223499675 0.23650032500000578 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf104 2.33141358263 0 84.844499025 0.3155009750000005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf105 2.50340530846 0 84.1550001 0.9074998500000149 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf106 5.24794714078 0 84.0970001 0.994499850000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf107 3.19188638661 0 83.9204998 1.259250300000005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf108 4.78711530883 0 83.746501 1.5202485000000152 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf109 3.38956467165 0 83.95250035 1.2112494750000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf110 2.49720367723 0 83.921998775 1.257001837499999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf111 4.1533654053 0 83.1165007 2.465248950000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf112 3.0122585054 0 84.66600095 0.4939990500000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf113 3.57011348802 0 83.949000875 1.2164986875000139 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf114 4.00665156404 0 84.172499475 0.8812507875000151 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf115 3.08465896376 0 82.97749955 2.673750675000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf116 4.17689583184 0 83.724500075 1.5532498875000158 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf117 4.36224960626 0 82.850499925 2.8642501125000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf118 4.03958106713 0 84.96750085 0.19249915000001183 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf119 2.28075482883 0 82.8405001 2.879249850000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf120 4.3751931669 0 83.410500125 2.0242498125000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf121 4.53672840544 0 83.13549935 2.4367509750000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf122 3.10094560281 0 83.877000975 1.3244985375000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf123 2.81925910919 0 83.148499875 2.4172501875000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf124 4.22476570574 0 83.5195001 1.8607498500000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf125 3.84974163103 0 84.2115002 0.8227497000000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf126 3.43305296381 0 83.175499875 2.376750187500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf127 3.04334411031 0 83.611999925 1.7220001124999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf128 3.93186488986 0 83.7040003 1.5839995500000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf129 4.85009935161 0 83.5490004 1.816499400000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf130 4.3751931669 0 83.993998825 1.1490017624999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf131 4.14752416478 0 85.14899995 0.01100005000000126 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf132 4.38821376777 0 83.797500275 1.443749587500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf133 3.27658441341 0 84.544500125 0.6154998750000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf134 4.03958106713 0 85.25950035 0.20049965000000897 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf135 3.59623494226 0 83.939500225 1.2307496625000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf136 4.44108049557 0 83.8625007 1.3462489500000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf137 2.56430926229 0 84.193999325 0.8490010125000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf138 3.75178902153 0 83.339999975 2.130000037500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf139 3.70465846079 0 84.716999475 0.44300052500001075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf140 2.70545850471 0 83.519999375 1.8600009375000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf141 3.17089287752 0 83.236999625 2.284500562500014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf142 3.61386264477 0 85.143000275 0.016999724999999466 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf143 4.71064890814 0 83.529500325 1.8457495125000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf144 4.47477396827 0 83.492000225 1.9019996625000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf145 4.53672840544 0 84.051999675 1.0620004875000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf146 3.31339824504 0 84.7090006 0.45099940000000915 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf147 2.82736787184 0 84.600000125 0.5599998750000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf148 2.74581611742 0 83.193 2.350500000000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf149 2.87421319527 0 83.2785002 2.222249700000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf150 4.42774475186 0 83.477500925 1.923748612500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf151 2.30388008076 0 84.286499575 0.7102506375000175 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf152 3.59185485404 0 84.2370006 0.784499100000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf153 2.38015655098 0 84.838500775 0.32149922500000516 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf154 4.91476285223 0 83.475999725 1.9260004125000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf155 4.21269566507 0 83.087499475 2.5087507875000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf156 2.22395915051 0 83.238499725 2.2822504125000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf157 4.75623276948 0 83.1924999 2.3512501500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf158 4.8341984727 0 83.758499325 1.502251012500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf159 3.70465846079 0 83.383499375 2.0647509375000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf160 3.15731364232 0 84.30150075 0.687748875000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf161 2.3293565891 0 84.392 0.5520000000000138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf162 2.2528774348 0 84.997000175 0.16299982500000854 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf163 2.71542297875 0 85.508499875 -0.04849987499999314 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf164 3.15731364232 0 84.4710007 0.6889993000000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf165 2.90250153505 0 85.274500125 0.18549987500000215 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf166 2.93135224398 0 85.342000525 0.11799947500000202 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf167 2.52912274255 0 84.337999375 0.6330009375000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf168 2.14786504266 0 84.48350045 0.6764995500000112 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf169 2.21727076111 0 84.595499825 0.5645001750000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf170 2.2528774348 0 85.041 0.11900000000000832 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf171 3.25488617683 0 84.558999125 0.6010008750000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf172 2.40535258985 0 84.651500075 0.5084999250000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf173 2.37819629574 0 84.377000025 0.5744999625000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf174 3.32835645011 0 84.435499525 0.4867507125000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf175 2.35917135957 0 84.716500425 0.4434995749999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf176 2.50761735877 0 84.535999925 0.6240000750000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf177 2.42513277215 0 84.369499725 0.5857504125000119 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf178 3.61386264477 0 84.274499475 0.7282507875000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf179 2.25801563131 0 84.589000125 0.5709998750000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf180 2.35917135957 0 84.480499875 0.6795001249999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf181 2.81925910919 0 85.573499275 -0.11349927499999807 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf182 2.52267840919 0 84.341001025 0.6284984625000121 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf183 3.17089287752 0 84.6909999 0.4690001000000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf184 2.75602585333 0 84.700500525 0.45949947500000976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf185 2.57325940549 0 84.8150002 0.3449998000000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf186 2.90250153505 0 84.423999575 0.5040006375000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf187 2.66632133899 0 85.6425003 -0.18250029999998957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf188 2.09298780883 0 84.61899955 0.5410004500000071 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf189 1.8711830795 0 84.5810003 0.5789997000000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf190 2.21727076111 0 84.653999475 0.5060005249999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf191 2.40535258985 0 84.786999975 0.3730000250000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf192 2.93135224398 0 85.291999425 0.16800057500000493 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf193 1.95815465425 0 85.651500225 -0.19150022500000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf194 3.36634957954 0 84.494499625 0.665500375000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf195 3.40522010623 0 84.375498825 0.5767517625000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf196 2.66632133899 0 84.538500425 0.62149957500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf197 2.50128201583 0 84.1665 0.8902500000000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf198 2.27895482379 0 84.610999625 0.549000375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf199 2.81925910919 0 84.495000675 0.6649993250000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf200 3.29857388792 0 84.6065009 0.5534991000000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf201 2.61896392029 0 84.7069994 0.4530006000000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf202 3.04014002515 0 84.4635001 0.6964999000000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf203 3.06572586948 0 84.7455009 0.414499100000009 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf204 3.93186488986 0 84.356500625 0.6052490625000146 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf205 4.94774553187 0 83.768500225 1.4872496625000124 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf206 3.84520265677 0 84.290999425 0.7035008625000145 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf207 2.61205424073 0 84.867499325 0.292500675000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf208 2.66414324327 0 86.032000175 -0.5720001749999881 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf209 3.69074932962 0 84.219500725 0.8107489125000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf210 4.06237215081 0 85.0309998 0.12900020000000156 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf211 3.31339824504 0 83.93949965 1.2307505250000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf212 3.26931959511 0 85.4195011 0.04049890000000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf213 2.27916093093 0 83.951499525 1.2127507125000037 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf214 3.03418372506 0 84.44249975 0.47625037500001355 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf215 3.35149437292 0 84.049999625 1.0650005625000105 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf216 2.97300733121 0 83.60550005 1.7317499250000026 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf217 3.52741090964 0 85.505999525 -0.04599952499998777 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf218 3.63209225801 0 83.57750015 1.7737497749999989 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf219 2.32202027362 0 85.46449985 -0.0044998499999905905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf220 4.20126736815 0 84.3879992 0.5580012000000139 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf221 2.32202027362 0 85.6529999 -0.1929998999999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf222 3.51519975253 0 83.630000325 1.6949995125000115 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf223 4.05678387799 0 85.077499975 0.08250002500001019 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf224 3.60986255985 0 85.31750015 0.14249985000000437 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf225 3.03730870211 0 83.63900065 1.681499025000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf226 5.15619113861 0 84.271500925 0.7327486125000107 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf227 2.73835708776 0 85.30150055 0.15849945000000504 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf228 4.29300531776 0 84.03700005 1.084499925000003 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf229 4.81915540131 0 84.11099975 0.9735003750000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf230 2.89679940787 0 83.536999925 1.8345001125000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf231 4.20126736815 0 84.871999175 0.288000825000006 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf232 4.29300531776 0 84.8200001 0.3399999000000037 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf233 5.06758855666 0 84.5160001 0.6439999000000057 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf234 2.81925910919 0 83.753498675 1.5097519875000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf235 4.46799443574 0 84.1489996 0.9165006000000133 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf236 4.29300531776 0 84.8124996 0.3475004000000098 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf237 2.8439887133 0 83.401999875 2.0370001875000057 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf238 3.1208796497 0 85.50700015 -0.04700014999999097 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf239 4.81915540131 0 84.649000525 0.5109994749999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf240 4.63728501684 0 84.728500125 0.43149987500000864 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf241 4.71136945196 0 84.487998925 0.6720010750000057 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf242 4.38883904114 0 84.81199985 0.3480001499999986 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf243 2.61686247873 0 83.9070001 1.2794998500000005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf244 2.76636700953 0 84.293000275 0.700499587500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf245 2.02960847562 0 84.04700125 1.0694981250000168 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf246 3.37064741875 0 84.3115009 0.6727486500000097 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf247 3.38956467165 0 85.288499275 0.17150072499999852 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf248 4.35642257412 0 84.583000325 0.5769996750000047 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf249 3.3138339442 0 85.486001025 -0.026001024999987965 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf250 3.27701048398 0 83.59450055 1.748249174999998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf251 3.25488617683 0 84.555500025 0.6044999750000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf252 4.63728501684 0 84.09699955 0.9945006749999976 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf253 5.07547506828 0 83.76399915 1.4940012750000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf254 2.32388897879 0 84.887499975 0.2725000250000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf255 2.89145135544 0 85.8500006 -0.3900005999999962 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf256 4.94025067672 0 84.158499675 0.9022504875000052 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf257 2.76636700953 0 84.41199975 0.5220003749999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf258 3.31003535109 0 85.418500075 0.04149992500000793 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf259 3.09443798634 0 85.576000175 -0.1160001749999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf260 2.22923500612 0 84.67999975 0.4800002500000119 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf261 3.35149437292 0 84.19099985 0.8535002250000119 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf262 5.02441943023 0 84.25649955 0.7552506750000063 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf263 3.31339824504 0 85.33299995 0.12700005000000375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf264 3.69537408729 0 85.2349998 0.22500020000000803 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf265 3.88013266186 0 84.294999275 0.6975010875000152 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf266 4.1533654053 0 82.951 2.7135000000000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf267 2.89679940787 0 82.9884999 2.657250150000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf268 4.39475317355 0 83.83999995 1.380000074999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf269 2.74576172323 0 83.461999475 1.9470007875000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf270 5.40080120652 0 82.863499425 2.844750862500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf271 2.89111964106 0 85.57900005 -0.11900004999999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf272 3.75178902153 0 84.894499425 0.2655005749999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf273 4.71064890814 0 83.8385005 1.3822492500000152 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf274 4.39475317355 0 83.992499725 1.151250412500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf275 4.29866382416 0 83.3029991 2.185501350000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf276 3.64063062067 0 84.453499975 0.4597500375000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf277 5.28466361659 0 82.879000675 2.821498987500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf278 4.68074200425 0 83.704000075 1.5839998875000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf279 4.09568311293 0 83.305500125 2.1817498125000157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf280 4.08433838508 0 83.359500075 2.100749887500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf281 3.32835645011 0 84.2059993 0.8310010500000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf282 2.9489396246 0 82.913999825 2.7690002625000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf283 4.40131209873 0 83.8754995 1.3267507500000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf284 4.10709103894 0 83.194000425 2.3489993625000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf285 3.8699491435 0 83.5369993 1.83450105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf286 3.70010922218 0 83.02999955 2.5950006750000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf287 2.91115808335 0 82.77200055 2.9819991750000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf288 3.99579404276 0 83.80400105 1.4339984250000128 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf289 4.48157410599 0 82.996000025 2.6459999625000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf290 4.29866382416 0 83.480500575 1.9192491375000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf291 3.33219723454 0 83.320000375 2.159999437499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf292 3.9635717019 0 85.159999775 2.25000002296305e-07 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf293 2.19094934992 0 84.789500625 0.37049937500000285 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf294 5.1195298246 0 82.923000125 2.7554998125000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf295 4.39475317355 0 83.503499775 1.8847503375000159 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf296 3.48984270518 0 84.316501225 0.6652481625000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf297 5.06675494506 0 82.949500225 2.7157496625000164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf298 2.73050807996 0 83.426499675 2.0002504875000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf299 4.46799443574 0 83.66850015 1.637249775000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf300 4.60761017185 0 82.818499225 2.912251162500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf301 4.32387438839 0 82.778499525 2.9722507125000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf302 4.08433838508 0 85.01950015 0.1404998500000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf303 4.06183643479 0 82.95150055 2.712749174999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf304 4.53672840544 0 83.8220002 1.4069997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf305 2.84647094048 0 85.43150025 0.028499750000005986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf306 5.04077343699 0 82.977500125 2.6737498124999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf307 3.9635717019 0 82.8584995 2.8522507500000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf308 3.61839106317 0 82.968501275 2.6872480875000164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf309 4.22476570574 0 83.063999525 2.544000712500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf310 4.29866382416 0 83.97550025 1.1767496250000136 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf311 2.88270158233 0 84.071500325 1.0327495125000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf312 3.91100734284 0 83.092499525 2.501250712500017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf313 4.3751931669 0 83.9080002 1.2779997000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf314 4.3751931669 0 82.922000325 2.756999512500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf315 2.84098663088 0 82.856499725 2.8552504124999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf316 2.2528774348 0 84.9994999 0.1605001000000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf317 3.17089287752 0 84.63199955 0.5280004500000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf318 2.60283267947 0 84.2664995 0.7402507500000155 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf319 2.32202027362 0 84.53199975 0.628000250000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf320 3.13716146983 0 84.20149995 0.8377500750000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf321 2.520476477 0 84.890501025 0.2694989749999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf322 3.36634957954 0 84.530499175 0.6295008250000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf323 2.75602585333 0 85.24450055 0.2154994500000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf324 2.73050807996 0 85.327499775 0.1325002249999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf325 2.57325940549 0 85.38850005 0.07149995000000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf326 2.23579750603 0 84.981499925 0.17850007500001086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf327 1.70856970404 0 85.779999 -0.31999899999999853 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf328 3.35104871505 0 84.346000075 0.6209998874999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf329 3.15731364232 0 84.38100045 0.5684993250000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf330 2.87421319527 0 85.384499975 0.07550002500000802 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf331 2.73050807996 0 84.6715004 0.4884996000000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf332 3.24057963994 0 84.39200025 0.5519996250000148 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf333 3.25488617683 0 84.634000375 0.5259996250000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf334 2.73050807996 0 85.609499625 -0.14949962499999286 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf335 2.09742183942 0 85.229000225 0.23099977500001218 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf336 3.0527280537 0 84.765500125 0.3944998750000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf337 2.12156410089 0 85.1930007 0.2669993000000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf338 2.90250153505 0 85.44900055 0.01099945000001129 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf339 2.37819629574 0 84.46199975 0.6980002500000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf340 2.22395915051 0 84.951000175 0.2089998250000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf341 3.04014002515 0 84.3760006 0.5759991000000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf342 2.44524097268 0 85.568500075 -0.10850007499999775 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf343 2.27895482379 0 84.6024996 0.5575004000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf344 1.95815465425 0 85.534000025 -0.07400002499999231 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf345 2.90250153505 0 85.49499935 -0.03499934999999538 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf346 3.17089287752 0 84.686 0.4739999999999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf347 2.81925910919 0 85.59749985 -0.13749985000000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf348 3.29857388792 0 84.630999275 0.5290007250000116 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf349 3.26208692053 0 84.315000225 0.6674996624999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf350 2.03940354341 0 85.43599915 0.02400085000000446 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf351 2.31110203954 0 84.442999825 0.475500262500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf352 3.46931522498 0 84.3325001 0.6412498500000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf353 2.37819629574 0 84.4335003 0.4897495499999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf354 2.57325940549 0 85.4859998 -0.025999799999996742 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf355 3.38956467165 0 84.546000025 0.6139999750000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf356 3.51061454803 0 84.455499375 0.4567509375000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf357 2.79256263316 0 84.673500175 0.4864998250000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf358 2.81925910919 0 84.470000625 0.6899993750000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf359 2.60048866322 0 85.495500325 -0.03550032499998962 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf360 4.94025067672 0 84.404999875 0.5325001875000055 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf361 4.67403368929 0 84.088000125 1.0079998125000174 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf362 3.47749717017 0 85.815501 -0.35550099999999246 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf363 4.85885544059 0 84.292499175 0.7012512375000028 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf364 4.68074200425 0 83.977499175 1.1737512374999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf365 4.78711530883 0 83.862999725 1.3455004125000158 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf366 4.0017352815 0 85.11050035 0.04949965000000989 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf367 4.36871179935 0 84.263999575 0.7440006375000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf368 3.13382775829 0 83.49650025 1.8952496250000124 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf369 3.69119162626 0 84.524999225 0.6350007750000032 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf370 2.73080396045 0 84.9065 0.25350000000001105 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf371 4.57189457086 0 84.15749935 0.9037509750000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf372 3.59185485404 0 85.0480001 0.11199990000000926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf373 4.67403368929 0 84.4090002 0.5264997000000164 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf374 2.97571998859 0 83.81799985 1.4130002249999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf375 3.35149437292 0 83.5740013 1.7789980499999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf376 5.02441943023 0 84.47450005 0.685499950000002 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf377 4.78711530883 0 84.00499975 1.1325003750000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf378 3.35149437292 0 84.349999975 0.6150000375000033 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf379 3.10094560281 0 86.061499025 -0.6014990249999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf380 4.48904909382 0 84.381999625 0.5670005624999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf381 2.80851950068 0 83.815500875 1.4167486875000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf382 4.29300531776 0 84.627500175 0.5324998250000107 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf383 3.54863373783 0 85.767000175 -0.3070001750000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf384 3.13755204997 0 83.70100015 1.5884997750000096 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf385 2.54900634755 0 84.051000425 1.0634993625000106 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf386 3.28789331155 0 85.129000325 0.030999674999998228 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf387 5.11150591832 0 84.26499995 0.7425000750000024 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf388 1.87599501422 0 84.58149945 0.5785005500000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf389 4.33659088676 0 83.977500975 1.1737485375000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf390 4.8661051796 0 83.88949965 1.3057505250000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf391 3.38604547437 0 85.566500875 -0.10650087499999755 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf392 2.96078226017 0 85.70449975 -0.2444997499999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf393 5.03217204616 0 83.6715 1.6327500000000157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf394 3.15056754597 0 84.98200025 0.17799975000000645 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf395 4.00665156404 0 83.9340005 1.2389992500000133 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf396 3.39002063361 0 85.293999675 0.16600032500001022 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf397 3.21935410129 0 84.070499775 1.034250337500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf398 4.85885544059 0 84.65649925 0.5035007500000092 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf399 4.70385465798 0 84.5609999 0.5990001000000064 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf400 3.44499877858 0 85.4609991 -0.000999099999989983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf401 4.55072975326 0 84.080499875 1.0192501875000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf402 2.81925910919 0 83.83000085 1.3949987250000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf403 4.33022330151 0 84.1734993 0.879751050000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf404 2.65699343733 0 83.763500225 1.4947496625000056 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf405 5.20166453319 0 84.033999425 1.0890008625000007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf406 3.25164094825 0 83.796001025 1.4459984625000146 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf407 4.48904909382 0 84.65949995 0.500500050000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf408 4.23690511031 0 84.1950004 0.8474994000000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf409 2.21401235836 0 85.0235005 0.13649950000000788 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf410 3.33596660148 0 85.36200025 0.09799975000001099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf411 3.15056754597 0 83.512500525 1.8712492125000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf412 4.0900028821 0 83.86599955 1.3410006750000107 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf413 4.74857429339 0 84.053999725 1.059000412500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf414 3.25488617683 0 83.933500225 1.239749662500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf415 2.80851950068 0 83.87650005 1.3252499250000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf416 3.70465846079 0 84.4189996 0.511500599999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf417 4.67403368929 0 84.3789996 0.5715006000000074 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf418 3.05595874161 0 84.039000725 1.0814989125000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf419 3.59185485404 0 85.8430001 -0.38300009999999246 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf420 4.06183643479 0 84.1495005 0.9157492500000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf421 2.69556689425 0 84.286999325 0.709501012500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf422 3.3549420805 0 85.782500025 -0.32250002499999936 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf423 2.45955992282 0 84.842500325 0.317499675000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf424 4.49523657385 0 83.92400015 1.2539997750000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf425 4.48904909382 0 83.901000575 1.2884991375000112 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf426 3.39354810299 0 85.559999275 -0.09999927499999045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf427 4.94025067672 0 84.243499175 0.7747512375000127 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf428 4.56481779115 0 84.1150009 0.9674986500000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf429 4.17689583184 0 84.253999075 0.7590013875000139 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf430 3.61386264477 0 85.844500175 -0.38450017499998806 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf431 3.08174415116 0 85.580500375 -0.12050037499999461 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf432 4.99805803481 0 83.592999425 1.7505008625000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf433 4.03958106713 0 84.00849995 1.1272500750000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf434 3.02145143763 0 83.268000675 2.2379989875000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf435 4.49523657385 0 83.028999975 2.5965000375000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf436 4.27374553867 0 83.9910002 1.1534997000000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf437 4.36224960626 0 83.847999525 1.3680007124999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf438 2.71542297875 0 83.52849905 1.8472514250000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf439 2.47811701105 0 84.5134996 0.6465004000000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf440 4.23690511031 0 83.79150005 1.4527499250000133 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf441 3.29857388792 0 85.05850045 0.10149955000000832 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf442 5.20997422309 0 83.15000035 2.414999475000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf443 5.50155459048 0 83.250999075 2.263501387500014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf444 3.18458942266 0 84.93299905 0.22700094999999865 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf445 4.68074200425 0 83.61949965 1.710750525000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf446 3.84974163103 0 84.69049935 0.46950065000000907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf447 4.63658695371 0 83.7550003 1.5074995499999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf448 4.68074200425 0 83.94699955 1.2195006750000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf449 2.9489396246 0 83.6290004 1.6964994000000146 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf450 5.1643560615 0 83.562499475 1.7962507875000142 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf451 3.93186488986 0 83.49099985 1.9035002250000161 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf452 3.55728584143 0 82.924999 2.752501500000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf453 4.24300101117 0 83.38699945 2.0595008250000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf454 4.95606048827 0 83.69699935 1.5945009750000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf455 4.75623276948 0 83.516500825 1.8652487625000163 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf456 2.7586587916 0 83.8450002 1.3724997000000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf457 5.40080120652 0 83.35250035 2.111249475000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf458 3.44499877858 0 84.8129998 0.3470002000000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf459 3.75178902153 0 84.776000275 0.3839997250000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf460 3.55290898553 0 84.6360004 0.5239996000000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf461 5.30367177341 0 82.9259998 2.7510003000000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf462 4.56481779115 0 83.055499625 2.5567505625000138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf463 5.20997422309 0 83.420999525 2.008500712500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf464 4.45449681271 0 83.66950075 1.6357488750000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf465 2.520476477 0 84.838000625 0.32199937499999864 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf466 3.55290898553 0 85.03699985 0.1230001500000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf467 3.65869733242 0 85.583500025 -0.12350002500000129 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf468 4.45449681271 0 83.6295 1.695750000000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf469 4.71064890814 0 83.543000025 1.825499962500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf470 2.2189746076 0 83.33249935 2.1412509749999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf471 2.97872563985 0 83.459499975 1.9507500375000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf472 5.1195298246 0 83.2679997 2.2380004500000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf473 4.91476285223 0 83.098000475 2.492999287499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf474 4.68074200425 0 83.8485003 1.367249550000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf475 4.27374553867 0 83.976999975 1.1745000375000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf476 3.57011348802 0 83.251999825 2.2620002625000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf477 4.27374553867 0 83.580000125 1.7699998125000107 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf478 4.01756825102 0 84.247500275 0.7687495875000181 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf479 4.8661051796 0 83.483000725 1.9154989125000128 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf480 2.3293565891 0 84.3574996 0.6037506000000121 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf481 3.73279362334 0 84.351000625 0.6134990625000114 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf482 5.20997422309 0 83.508000175 1.877999737499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf483 4.71064890814 0 83.18400015 2.3639997750000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf484 2.97872563985 0 84.0844998 1.0132503000000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf485 4.99805803481 0 83.6575002 1.6537497000000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf486 4.68074200425 0 83.4239998 2.0040003000000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf487 4.68074200425 0 83.921 1.258499999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf488 5.60613859814 0 83.26149955 2.247750675000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf489 3.65869733242 0 84.5534994 0.6065005999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf490 3.61386264477 0 84.3454998 0.6217503000000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf491 2.54220394201 0 84.2759998 0.7260003000000168 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf492 2.69064816356 0 84.613499375 0.5465006250000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf493 2.50761735877 0 84.60450015 0.5554998499999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf494 2.57325940549 0 84.82849985 0.3315001500000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf495 2.75602585333 0 85.552499775 -0.09249977499999601 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf496 2.90250153505 0 85.553999725 -0.09399972499999193 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf497 2.93135224398 0 85.3530005 0.10699950000001196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf498 3.25488617683 0 84.6365001 0.5234998999999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf499 3.17089287752 0 84.72150055 0.43849945000000334 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf500 2.71542297875 0 84.4644997 0.6955003000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf501 2.39753056999 0 84.811998925 0.34800107500000765 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf502 3.51061454803 0 84.41499995 0.5175000750000152 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf503 2.15883788221 0 85.138499675 0.021500324999999543 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf504 3.61386264477 0 84.338499175 0.6322512375000144 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf505 2.3293565891 0 84.847500825 0.3124991750000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf506 3.0527280537 0 84.755499425 0.4045005750000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf507 2.25112114639 0 85.3159997 0.14400029999999903 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf508 2.59591050603 0 84.5725002 0.5874998000000119 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf509 2.21727076111 0 84.6444996 0.515500400000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf510 3.51061454803 0 84.459499925 0.4507501125000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf511 2.37819629574 0 84.42350025 0.5047496250000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf512 3.11726491618 0 84.47399915 0.6860008500000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf513 2.93135224398 0 84.58050005 0.5794999500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf514 3.36634957954 0 84.510999775 0.6490002250000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf515 2.48647461628 0 84.774499375 0.38550062500000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf516 2.23579750603 0 84.407000725 0.5294989125000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf517 2.71542297875 0 85.471001025 -0.011001025000001607 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf518 2.40535258985 0 84.3829998 0.5655003000000178 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf519 2.90250153505 0 85.4464989 0.013501100000010535 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf520 3.21935410129 0 84.5264997 0.6335003000000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf521 2.52912274255 0 84.368500525 0.5872492125000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf522 2.73050807996 0 84.839500775 0.3204992250000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf523 2.29313137734 0 85.28849955 0.17150045000000774 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf524 2.66632133899 0 84.5649994 0.5950006000000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf525 3.25488617683 0 84.5769993 0.583000700000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf526 4.11336811599 0 84.4165001 0.5152498500000178 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf527 3.93186488986 0 84.1989996 0.8415006000000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf528 3.26974377829 0 84.248499875 0.7672501875000179 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf529 3.35522632718 0 85.427999675 0.03200032500000988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf530 3.51519975253 0 85.325000175 0.13499982500000557 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf531 2.54659446441 0 85.07150035 0.08849965000001137 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf532 3.40166834257 0 84.99499965 0.16500035000000823 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf533 4.42774475186 0 84.049499125 1.0657513125000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf534 3.23736286274 0 83.677999875 1.6230001875000113 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf535 3.08503657926 0 84.559999775 0.6000002250000108 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf536 2.43921651142 0 84.4750008 0.6849992000000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf537 2.63539656996 0 83.5919994 1.7520008999999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf538 4.68074200425 0 83.95500035 1.2074994749999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf539 3.26974377829 0 85.361000825 0.09899917500000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf540 3.21617930472 0 84.7574993 0.4025006999999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf541 3.15770925683 0 84.638499425 0.5215005749999989 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf542 3.55760541076 0 85.297001425 0.1629985750000003 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf543 2.45955992282 0 85.02450045 0.13549955000000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf544 4.45514111937 0 84.635499725 0.5245002750000026 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf545 3.65869733242 0 85.610500525 -0.15050052500000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf546 1.98330178402 0 83.787500225 1.4587496625000043 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf547 3.47749717017 0 85.6019997 -0.14199969999998813 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf548 3.67738324523 0 85.27900045 0.18099955000000706 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf549 3.70045496902 0 85.3585005 0.10149949999999991 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf550 3.28789331155 0 85.61950085 -0.1595008499999892 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf551 4.51654937482 0 84.5564993 0.6035007000000064 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf552 5.29506075557 0 84.062999625 1.0455005625000027 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf553 4.68889675944 0 84.380499025 0.5692514624999987 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf554 2.86615984136 0 83.78750105 1.4587484250000031 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf555 2.65699343733 0 84.853500125 0.30649987500000864 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf556 2.38015655098 0 84.0074999 1.128750150000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf557 3.26974377829 0 85.22600125 0.2339987500000092 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf558 3.28789331155 0 85.26200045 0.19799955000000297 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf559 2.97001320307 0 85.713000675 -0.2530006749999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf560 3.266045536 0 83.7365 1.5352499999999978 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf561 2.32402535693 0 85.8155004 -0.35550039999999966 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf562 2.97307110165 0 84.287499875 0.7087501875000157 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf563 3.32496313987 0 85.240500875 0.21949912500001006 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf564 3.21617930472 0 84.58050025 0.5794997500000051 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf565 2.8168240943 0 83.5575006 1.8037491000000117 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf566 2.80077299026 0 84.7334995 0.4265005000000116 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf567 4.42174155156 0 84.5384996 0.6215004000000107 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf568 4.749306487 0 84.507498925 0.6525010750000121 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf569 3.10094560281 0 85.609499925 -0.14949992499998926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf570 2.31844697619 0 85.093500125 0.06649987499999954 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf571 2.20731326885 0 85.1330001 0.026999900000001298 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf572 2.80851950068 0 83.83300075 1.390498875000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf573 3.13089004774 0 84.530499925 0.6295000750000043 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf574 2.94339750184 0 84.789 0.37100000000000366 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf575 2.96673931882 0 84.37349995 0.579750075000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf576 3.05595874161 0 85.299 0.16099999999999853 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf577 2.29874094265 0 84.173999425 0.8790008624999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf578 2.89992066892 0 85.835000775 -0.37500077499999235 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf579 1.88691288205 0 85.365000175 0.09499982499999932 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf580 3.19188638661 0 83.9349999 1.2375001500000167 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf581 3.15770925683 0 83.932000175 1.2419997375000094 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf582 3.65459743556 0 85.2625 0.19750000000000228 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf583 4.48904909382 0 84.575000775 0.5849992249999986 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf584 3.17463312783 0 85.314500525 0.14549947500000543 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf585 4.46188167251 0 84.81450025 0.34549975000001043 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf586 3.30262160909 0 83.46249945 1.9462508250000141 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf587 2.44755104472 0 84.18550015 0.861749775000014 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf588 2.27743827873 0 85.0779999 0.08200010000001046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf589 5.30367177341 0 83.246999525 2.2695007124999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf590 5.30367177341 0 83.489000525 1.906499212500016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf591 5.28466361659 0 83.276498925 2.22525161250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf592 2.93135224398 0 83.3195003 2.1607495500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf593 3.39354810299 0 82.8199997 2.9100004500000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf594 4.49523657385 0 83.3474991 2.118751350000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf595 5.03217204616 0 83.44649945 1.9702508250000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf596 4.00665156404 0 83.358499125 2.102251312500016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf597 2.90250153505 0 85.5110003 -0.051000300000001164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf598 2.80057492256 0 83.1685006 2.3872491000000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf599 5.30367177341 0 83.4164997 2.0152504500000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf600 3.08465896376 0 83.0965006 2.4952491000000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf601 2.99080920887 0 83.1744995 2.3782507500000136 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf602 4.74094044098 0 83.113999725 2.4690004125000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf603 4.56481779115 0 83.7375002 1.5337497000000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf604 3.25488617683 0 85.93800085 -0.47800084999998943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf605 4.95606048827 0 83.646 1.6710000000000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf606 3.46931522498 0 84.030500025 1.0942499625000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf607 3.47348842326 0 82.962000275 2.6969995875000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf608 5.1195298246 0 83.403000875 2.0354986875000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf609 3.31339824504 0 84.685499025 0.47450097500000654 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf610 5.03217204616 0 83.4554993 1.9567510500000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf611 3.68613613324 0 83.4790005 1.9214992500000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf612 4.91476285223 0 83.479000075 1.9214998875000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf613 3.80995316188 0 83.221500025 2.3077499625000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf614 4.17689583184 0 83.719001025 1.561498462500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf615 4.93119904091 0 83.428499825 1.9972502625000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf616 3.17089287752 0 84.216499775 0.8152503375000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf617 4.53672840544 0 83.583499825 1.7647502625000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf618 4.75623276948 0 83.30349945 2.184750825000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf619 4.94774553187 0 83.322999425 2.1555008624999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf620 5.1195298246 0 83.27599975 2.2260003750000124 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf621 5.40080120652 0 83.302000575 2.186999137500017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf622 3.14057344002 0 83.0755001 2.5267498500000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf623 4.13009860961 0 83.785000275 1.4624995875000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf624 3.23702263508 0 85.207499875 0.2525001250000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf625 3.75178902153 0 84.038999775 1.0815003375000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf626 2.70799566698 0 83.4659988 1.9410018000000164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf627 5.40080120652 0 83.39550015 2.046749775000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf628 4.56481779115 0 83.61799965 1.7130005250000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf629 4.06183643479 0 83.447999075 1.96800138750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf630 3.42102082784 0 83.98699975 1.1595003750000146 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf631 4.75623276948 0 83.5254996 1.8517506000000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf632 2.44524097268 0 83.49900015 1.8914997750000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf633 2.86305165382 0 85.425500675 0.03449932500001013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf634 4.46799443574 0 83.6610006 1.6484991000000164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf635 2.65671333449 0 84.918999 0.24100100000000568 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf636 3.32835645011 0 84.2724993 0.7312510499999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf637 2.59591050603 0 83.733500725 1.5397489125000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf638 4.75623276948 0 83.531999775 1.8420003375000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf639 3.38567327954 0 84.47250065 0.6874993500000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf640 2.81925910919 0 83.356000325 2.105999512500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf641 4.00665156404 0 83.6824997 1.6162504500000168 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf642 4.56481779115 0 83.063499975 2.544750037500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf643 4.49523657385 0 83.25949975 2.2507503750000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf644 4.3751931669 0 83.59049985 1.754250225000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf645 3.11726491618 0 83.152500325 2.4112495124999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf646 3.93186488986 0 83.749999325 1.5150010125000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf647 4.09568311293 0 83.209001025 2.3264984624999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf648 3.35868172117 0 84.0900003 1.004999550000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf649 2.81925910919 0 84.555999975 0.6040000249999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf650 2.42513277215 0 84.331499675 0.6427504875000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf651 1.90256982287 0 84.902999375 0.257000625000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf652 3.51061454803 0 84.4349997 0.48750044999999886 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf653 1.70856970404 0 85.779999 -0.31999899999999853 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf654 2.48647461628 0 84.258500325 0.7522495125000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf655 2.64243046275 0 85.2780003 0.18199970000000293 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf656 3.25488617683 0 84.5834995 0.5765005000000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf657 2.65671333449 0 85.422499975 0.03750002500001132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf658 2.90250153505 0 85.48449975 -0.02449974999999255 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf659 3.40522010623 0 84.250000125 0.7649998125000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf660 1.70856970404 0 83.979996 1.1700060000000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf661 2.20731326885 0 85.229500375 0.23049962500000448 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf662 2.46568541903 0 84.5034996 0.6565004000000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf663 2.69064816356 0 84.5959997 0.5640003000000121 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf664 2.64243046275 0 85.358000375 0.10199962500000198 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf665 2.08260997416 0 84.921000775 0.23899922500000914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf666 2.00610810282 0 85.678000625 -0.21800062499999057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf667 3.51061454803 0 84.4344999 0.48825014999999894 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf668 2.50761735877 0 84.554999925 0.605000075000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf669 2.50128201583 0 84.257000525 0.754499212500015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf670 2.09742183942 0 84.831500325 0.32849967500001187 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf671 2.09742183942 0 85.82949995 -0.36949994999999375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf672 3.36634957954 0 84.42999935 0.4950009750000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf673 2.90250153505 0 85.48999975 -0.02999974999999039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf674 2.09298780883 0 84.546000375 0.6139996249999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf675 2.44524097268 0 85.437500525 0.022499475000000768 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf676 2.64243046275 0 84.79750005 0.3624999500000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf677 2.42513277215 0 84.43400065 0.48899902500000536 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf678 1.77010128766 0 84.7734991 0.38650090000001003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf679 2.61896392029 0 84.401499175 0.5377512375000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf680 2.08260997416 0 85.7424997 -0.2824996999999911 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf681 2.31110203954 0 84.53350035 0.6264996500000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf682 2.31110203954 0 84.32900035 0.6464994750000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf683 2.40535258985 0 84.41899995 0.5115000750000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf684 2.36674469012 0 84.894500225 0.26549977500000355 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf685 1.79160865678 0 85.339996 0.12000400000000583 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf686 2.08260997416 0 85.585000675 -0.12500067499999828 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf687 3.55290898553 0 84.268999875 0.736500187499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf688 2.86305165382 0 84.4700005 0.6899995000000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf689 2.29313137734 0 84.528500025 0.6314999750000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf690 2.37819629574 0 84.4455007 0.4717489500000127 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf691 3.36634957954 0 84.542 0.6180000000000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf692 3.26208692053 0 84.27099965 0.7335005250000179 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf693 2.29313137734 0 84.73400045 0.4259995500000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf694 3.02145143763 0 84.648499925 0.5115000750000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf695 2.520476477 0 85.5895 -0.1294999999999959 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf696 2.57325940549 0 85.60300025 -0.14300024999998867 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf697 3.1883620737 0 85.56250075 -0.10250074999999298 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf698 3.06230535434 0 85.831000325 -0.37100032499999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf699 3.26931959511 0 84.712999925 0.4470000749999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf700 1.99673018213 0 83.7179994 1.563000900000013 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf701 2.47192966909 0 84.130000425 0.944999362499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf702 3.54904262632 0 84.3324999 0.6412501500000047 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf703 2.41741365769 0 84.94699985 0.21300015000000772 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf704 2.31844697619 0 85.657000325 -0.19700032499999337 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf705 3.04014002515 0 83.450499725 1.9642504125000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf706 4.27374553867 0 83.857000625 1.354499062500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf707 2.96078226017 0 83.80250075 1.4362488750000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf708 3.21935410129 0 83.672501175 1.6312482375000172 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf709 2.01859798873 0 83.3645002 2.0932497000000154 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf710 3.09443798634 0 83.43500025 1.9874996250000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf711 2.63326868545 0 83.457499725 1.9537504124999998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf712 2.94311625424 0 86.09950025 -0.6395002500000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf713 3.06230535434 0 86.115999225 -0.6559992249999909 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf714 4.24911447842 0 83.793999475 1.4490007874999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf715 3.31003535109 0 83.8900007 1.3049989500000052 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf716 3.62274145966 0 83.74299865 1.5255020250000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf717 2.96078226017 0 84.0415006 1.0777490999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf718 2.39576934005 0 84.632000575 0.5279994249999987 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf719 3.93186488986 0 83.845000625 1.3724990625000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf720 3.84520265677 0 84.046999725 1.0695004124999983 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf721 2.49704621765 0 84.4030001 0.5354998500000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf722 2.65699343733 0 85.916499875 -0.4564998749999944 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf723 2.73080396045 0 85.85699995 -0.39699994999999716 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf724 4.21872205213 0 83.792000375 1.4519994375000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf725 2.95787581321 0 86.111499675 -0.6514996749999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf726 2.38015655098 0 84.557498925 0.6025010750000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf727 3.25164094825 0 84.9199999 0.2400001000000117 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf728 2.35356305584 0 85.23000035 0.22999965000000772 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf729 4.06183643479 0 83.7469994 1.519500899999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf730 2.40535258985 0 84.8959992 0.2640007999999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf731 3.1208796497 0 85.665500225 -0.20550022499999726 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf732 3.15770925683 0 85.61300085 -0.1530008500000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf733 3.88013266186 0 83.951999825 1.2120002625000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf734 3.3138339442 0 85.405999775 0.054000225000007174 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf735 3.34760900076 0 85.161500625 0.29849937500000295 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf736 4.18338826327 0 84.270000575 0.7349991375000116 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf737 2.49092077818 0 83.781500625 1.4677490624999976 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf738 3.02145143763 0 85.3139999 0.14600010000000624 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf739 3.09112547159 0 84.4004995 0.539250750000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf740 4.94774553187 0 83.575499 1.7767515000000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf741 3.79036576283 0 84.13149995 0.9427500749999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf742 3.9635717019 0 83.76249945 1.496250824999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf743 3.1208796497 0 83.843999825 1.3740002625000116 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf744 2.36502838432 0 86.03249995 -0.5724999499999968 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf745 4.46799443574 0 83.862499775 1.3462503375000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf746 3.09112547159 0 85.35899945 0.10100055000000624 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf747 3.13050112438 0 85.693498925 -0.23349892499999497 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf748 3.93186488986 0 83.7594999 1.500750150000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf749 3.31003535109 0 85.620500375 -0.16050037500000086 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf750 2.34234689252 0 84.826500525 0.333499475000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf751 3.34760900076 0 83.8520003 1.3619995500000073 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf752 4.78009885505 0 83.989999425 1.1550008625000174 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf753 2.2649524773 0 84.44999965 0.4650005250000149 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf754 4.13009860961 0 83.844999475 1.372500787500016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf755 3.17089287752 0 84.80849995 0.35150005000000706 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf756 3.22639831866 0 84.29249985 0.7012502250000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf757 3.02484725731 0 83.689999175 1.6050012375000122 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf758 4.27374553867 0 83.828500725 1.397248912500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf759 2.23426580428 0 84.93800035 0.22199965000000932 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf760 4.33659088676 0 83.9419998 1.2270003000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf761 2.50340530846 0 83.616499725 1.715250412500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf762 3.31339824504 0 84.70049985 0.4595001500000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf763 3.17437865535 0 83.529500175 1.8457497375000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf764 2.56430926229 0 84.0064993 1.1302510500000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf765 3.30262160909 0 83.82300065 1.4054990250000117 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf766 2.51642672638 0 83.737999875 1.5330001875000079 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf767 3.09150467208 0 84.5830006 0.5769993999999997 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf768 2.14786504266 0 84.8885 0.27150000000001173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf769 2.8439887133 0 83.5995003 1.7407495500000039 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf770 4.53042625525 0 84.394499275 0.548251087500006 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf771 5.02441943023 0 84.026499275 1.1002510874999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf772 2.83306023509 0 84.5554991 0.6045008999999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf773 3.62274145966 0 84.25650025 0.7552496250000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf774 3.9162009563 0 85.016500125 0.14349987500001193 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf775 3.40522010623 0 84.588500025 0.5714999750000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf776 4.71064890814 0 83.615999425 1.7160008625000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf777 2.87421319527 0 83.48200005 1.9169999250000131 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf778 3.99579404276 0 85.31950015 0.14049985000000903 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf779 4.28616846517 0 83.094000975 2.4989985375000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf780 3.23702263508 0 84.514499575 0.645500425000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf781 4.74857429339 0 83.713999775 1.5690003375000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf782 3.60502724486 0 84.59950105 0.5604989500000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf783 3.08465896376 0 84.54 0.6199999999999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf784 2.59591050603 0 83.83300055 1.3904991750000164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf785 4.46123541487 0 84.034000175 1.0889997375000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf786 4.22476570574 0 83.06150035 2.5477494750000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf787 2.56430926229 0 84.01500055 1.1174991750000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf788 3.02145143763 0 83.44000055 1.9799991750000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf789 3.40522010623 0 84.200000775 0.8399988374999978 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf790 3.90066133282 0 83.974501425 1.1782478625000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf791 2.73557376185 0 83.05799955 2.553000675 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf792 2.76636700953 0 85.455001425 0.004998574999999061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf793 3.47348842326 0 82.9944988 2.648251800000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf794 4.14169933128 0 83.0104999 2.624250150000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf795 4.74857429339 0 83.849500325 1.3657495125000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf796 4.91476285223 0 83.179499625 2.3707505624999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf797 4.2552455881 0 84.835999775 0.32400022500000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf798 2.76636700953 0 83.53999915 1.8300012750000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf799 4.22476570574 0 83.30599935 2.1810009750000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf800 3.6587939114 0 83.05299955 2.5605006750000143 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf801 4.09568311293 0 84.89399985 0.266000150000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf802 4.2552455881 0 85.200001375 0.25999862500000576 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf803 2.46568541903 0 83.904499675 1.2832504875000126 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf804 4.74094044098 0 82.86699975 2.839500375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf805 4.11281886651 0 85.239499525 0.22050047500000575 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf806 4.64388818634 0 83.799499325 1.4407510125000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf807 4.78711530883 0 83.751000675 1.5134989875000073 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf808 4.63658695371 0 83.893499875 1.2997501875000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf809 1.97124355876 0 83.85899975 1.3515003750000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf810 3.98499520747 0 83.9450001 1.2224998500000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf811 3.52741090964 0 84.077000975 1.0244985375000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf812 2.99080920887 0 84.590000025 0.5699999750000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf813 4.60761017185 0 83.8474998 1.3687503000000163 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf814 4.06183643479 0 84.022999375 1.105500937500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf815 3.34724520741 0 84.5710008 0.588999200000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf816 4.00665156404 0 83.653000675 1.6604989875000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf817 4.23690511031 0 83.977000425 1.1744993625000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf818 2.71542297875 0 84.7090005 0.4509995000000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf819 3.15731364232 0 84.37249985 0.5812502250000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf820 3.48571779877 0 84.40899965 0.5265005250000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf821 3.51061454803 0 84.460999725 0.6990002750000116 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf822 3.51061454803 0 84.4284992 0.4972512000000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf823 3.25488617683 0 84.582499525 0.5775004750000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf824 2.79256263316 0 84.7004999 0.4595001000000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf825 2.52912274255 0 84.343999825 0.6240002625000116 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf826 2.57325940549 0 85.587500225 -0.1275002249999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf827 2.08260997416 0 85.6949997 -0.23499969999999165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf828 2.71542297875 0 84.482500075 0.6774999250000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf829 2.15883788221 0 84.616500025 0.5434999750000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf830 3.61386264477 0 84.2965002 0.6952497000000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf831 3.36634957954 0 84.5810001 0.578999900000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf832 2.48647461628 0 84.261000025 0.7484999625000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf833 3.25488617683 0 84.6624995 0.49750050000000956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf834 2.79256263316 0 84.6570003 0.502999699999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf835 2.81925910919 0 85.500000125 -0.04000012499999456 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf836 2.21727076111 0 84.65950015 0.5004998500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf837 2.90250153505 0 85.5304994 -0.07049939999999139 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf838 3.38956467165 0 84.506500075 0.6534999249999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf839 3.21935410129 0 84.40149955 0.5377506750000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf840 2.23579750603 0 84.38950035 0.5557494749999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf841 3.38956467165 0 84.5390001 0.6209999000000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf842 2.66632133899 0 85.676500775 -0.2165007749999887 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf843 2.81925910919 0 84.6275007 0.5324993000000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf844 1.70856970404 0 85.419998 0.04000199999999837 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf845 3.52741090964 0 84.5234993 0.6365007000000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf846 4.50209909388 0 83.777499825 1.4737502624999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf847 5.03217204616 0 83.9095009 1.2757486500000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf848 4.92375419354 0 84.24299925 0.7755011250000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf849 3.46931522498 0 84.1740002 0.8789997000000156 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf850 4.27374553867 0 83.932 1.2420000000000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf851 4.29300531776 0 84.79499985 0.36500015000000874 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf852 5.20997422309 0 83.6445001 1.6732498500000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf853 4.68074200425 0 83.96350045 1.1947493250000107 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf854 4.10709103894 0 84.11049995 0.9742500750000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf855 3.70045496902 0 85.319000275 0.14099972500001173 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf856 2.50336009449 0 83.910500825 1.274248762500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf857 4.23690511031 0 83.625999475 1.701000787500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf858 4.8661051796 0 83.69950015 1.590749774999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf859 2.28075482883 0 84.09450065 0.9982490250000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf860 5.02441943023 0 84.301500975 0.6877485375000134 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf861 5.11150591832 0 84.06200025 1.0469996250000122 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf862 4.42174155156 0 84.074000375 1.028999437500012 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf863 3.88013266186 0 84.0149998 1.1175003000000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf864 3.42944293235 0 84.10900035 0.9764994750000042 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf865 3.62274145966 0 84.2265001 0.8002498500000144 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf866 3.3549420805 0 85.2805001 0.17949990000000754 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf867 5.29506075557 0 84.064000875 1.0439986875000002 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf868 1.97658461885 0 84.076500575 1.025249137500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf869 3.65459743556 0 84.115999575 0.9660006375000023 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf870 4.67403368929 0 84.097999925 0.9930001125000132 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf871 2.55100017838 0 84.878999625 0.2810003749999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf872 4.81915540131 0 84.227499325 0.7987510125000128 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf873 3.34760900076 0 83.82450065 1.403249025000001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf874 5.11150591832 0 84.492499725 0.6675002750000033 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf875 2.46382267142 0 85.919500175 -0.4595001749999909 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf876 4.94025067672 0 84.42749955 0.4987506750000179 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf877 5.20166453319 0 84.06699965 1.0395005250000082 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf878 4.85885544059 0 84.432499275 0.4912510875000109 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf879 2.9200817227 0 85.785500925 -0.32550092499998867 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf880 4.46188167251 0 84.607499625 0.5525003750000025 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf881 3.39002063361 0 85.37800025 0.08199975000000564 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf882 4.85885544059 0 84.611500325 0.5484996750000107 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf883 4.23690511031 0 83.910499925 1.2742501125000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf884 3.89600156508 0 84.11800005 0.9629999249999983 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf885 4.94025067672 0 84.202500325 0.8362495125000038 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf886 5.20166453319 0 84.29650075 0.6952488749999972 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf887 1.89161252668 0 83.965499075 1.1917513875000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf888 4.78009885505 0 84.21499925 0.8175011249999997 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf889 5.1195298246 0 83.801499525 1.4377507125000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf890 4.749306487 0 83.99499935 1.147500975000007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf891 4.749306487 0 84.472999575 0.6870004250000022 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf892 4.85885544059 0 84.128499925 0.9472501125000079 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf893 4.63658695371 0 83.60299915 1.7355012750000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf894 4.03958106713 0 84.0860001 1.0109998499999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf895 4.0017352815 0 84.55099985 0.6090001500000085 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf896 2.12312397991 0 83.715999125 1.5660013125000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf897 3.48571779877 0 84.22900075 0.7964988750000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf898 3.09112547159 0 85.610999925 -0.15099992499999643 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf899 4.94774553187 0 83.7374994 1.5337509000000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf900 4.40131209873 0 83.785500375 1.4617494375000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf901 4.749306487 0 84.460999675 0.6990003250000086 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf902 3.09150467208 0 84.3419994 0.6270008999999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf903 2.1110784986 0 85.57450055 -0.1145005499999911 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf904 3.08174415116 0 84.675499325 0.48450067500000105 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf905 3.61386264477 0 84.1864996 0.8602506000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf906 3.51061454803 0 83.785001375 1.462497937500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf907 3.17129190255 0 84.4570002 0.4544997000000137 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf908 2.96673931882 0 84.529999 0.6300010000000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf909 4.48904909382 0 84.50700015 0.652999850000009 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf910 3.52359981675 0 85.515999775 -0.05599977499999226 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf911 3.56620961403 0 83.5610005 1.798499249999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf912 3.79571140468 0 84.180500175 0.8692497374999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf913 5.1195298246 0 83.684500775 1.6132488375000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf914 4.42774475186 0 83.99950025 1.1407496250000122 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf915 4.20069439548 0 83.33799975 2.133000375000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf916 5.48110429858 0 82.92599965 2.7510005250000162 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf917 4.48157410599 0 84.8595009 0.3004991000000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf918 3.43305296381 0 82.986499725 2.6602504125000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf919 3.07180677592 0 85.713500825 -0.2535008249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf920 2.80851950068 0 83.7929987 1.4505019500000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf921 2.65915985321 0 82.96049895 2.6992515750000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf922 4.36224960626 0 83.6820007 1.6169989500000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf923 2.55100017838 0 83.695000375 1.597499437499998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf924 4.23690511031 0 83.8964995 1.295250750000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf925 3.20891955001 0 83.2314995 2.2927507500000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf926 4.55072975326 0 84.80150085 0.35849915000000865 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf927 2.92553630931 0 83.336999875 2.1345001875000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf928 4.49523657385 0 83.916500425 1.2652493625000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf929 5.50155459048 0 83.51249965 1.8712505250000149 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf930 3.67694425138 0 84.235999875 0.7860001875000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf931 4.93119904091 0 82.9615 2.6977500000000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf932 4.0900028821 0 83.89899975 1.2915003750000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf933 4.22476570574 0 83.745500275 1.521749587500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf934 3.80013422222 0 84.2410002 0.7784997000000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf935 5.30367177341 0 82.856500275 2.855249587500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf936 3.25488617683 0 84.620499575 0.5395004250000085 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf937 2.96979047613 0 83.305499825 2.1817502625000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf938 2.73557376185 0 83.313000675 2.170498987500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf939 3.43696886837 0 84.61449925 0.5455007500000107 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf940 2.97272039686 0 83.092500525 2.5012492124999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf941 3.99579404276 0 82.91350005 2.7697499250000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf942 3.80013422222 0 84.844499225 0.3155007749999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf943 2.32388897879 0 83.23450085 2.2882487250000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf944 2.06369518543 0 83.706999925 1.5795001124999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf945 2.65671333449 0 83.801499775 1.4377503375000131 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf946 4.78711530883 0 84.057999775 1.0530003375000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf947 4.55072975326 0 83.11199915 2.4720012750000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf948 4.55072975326 0 84.099999525 0.9900007125000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf949 3.57011348802 0 84.12449885 0.9532517250000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf950 2.64243046275 0 84.739500725 0.42049927500000595 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf951 4.6659305061 0 83.43800025 1.982999625000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf952 3.23709823473 0 83.22099955 2.3085006750000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf953 2.08260997416 0 85.3160003 0.14399970000000623 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf954 2.50761735877 0 84.571500425 0.5884995750000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf955 2.25112114639 0 85.2259992 0.23400080000000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf956 2.75602585333 0 84.68899955 0.4710004499999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf957 2.90250153505 0 85.53949975 -0.07949974999999937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf958 2.69064816356 0 84.621499775 0.5385002250000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf959 2.93135224398 0 85.440000125 0.019999875000007716 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf960 2.57325940549 0 85.5850002 -0.12500019999999096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf961 3.51061454803 0 84.41399995 0.519000075000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf962 2.22395915051 0 85.238499825 0.2215001749999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf963 2.79256263316 0 84.66099955 0.4990004500000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf964 3.36634957954 0 84.44199985 0.4770002250000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf965 2.73050807996 0 84.76400015 0.39599985000000404 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf966 2.54220394201 0 84.27050005 0.7342499250000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf967 1.77010128766 0 84.79199985 0.36800015000000885 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf968 1.87955651685 0 84.387500975 0.5587485375000156 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf969 3.51061454803 0 84.443000275 0.47549958750000343 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf970 2.57325940549 0 85.5029995 -0.04299949999999625 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf971 2.31110203954 0 84.367001075 0.5894983875000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf972 3.04014002515 0 84.583499675 0.5765003250000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf973 2.520476477 0 84.920999525 0.23900047500000599 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf974 2.23579750603 0 84.410500025 0.5242499625000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf975 2.87421319527 0 84.518499575 0.6415004249999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf976 3.46931522498 0 84.362999925 0.5955001125000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf977 2.81925910919 0 85.587499775 -0.1274997749999926 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf978 2.23579750603 0 84.61800015 0.5419998500000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf979 2.09742183942 0 85.799500125 -0.33950012499998933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf980 2.12156410089 0 85.204999775 0.2550002250000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf981 2.25112114639 0 85.313998025 0.1460019750000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf982 2.22395915051 0 85.109500725 0.0504992750000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf983 3.36634957954 0 84.5195002 0.6404998000000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf984 1.88313156795 0 84.220001 0.8099985000000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf985 2.48647461628 0 84.275000025 0.727499962500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf986 3.11726491618 0 84.408999825 0.526500262500015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf987 1.63288729942 0 85.540001 -0.0800009999999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf988 2.46568541903 0 84.273999025 0.7290014625000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf989 2.25801563131 0 84.59349955 0.5665004500000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf990 3.39002063361 0 85.404500225 0.05549977499999842 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf991 4.8661051796 0 83.647499625 1.6687505625000156 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf992 3.42944293235 0 83.4234997 2.0047504500000173 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf993 4.46799443574 0 83.8705004 1.3342494000000116 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf994 4.05678387799 0 84.88300015 0.27699985000000427 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf995 4.98960168004 0 83.691000175 1.6034997375000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf996 3.3549420805 0 85.6950002 -0.2350001999999904 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf997 4.85885544059 0 84.159999825 0.9000002625000079 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf998 4.42174155156 0 84.0690004 1.036499400000018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf999 4.57189457086 0 83.5935001 1.7497498500000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1000 4.00717282134 0 84.706499675 0.4535003250000017 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1001 3.42944293235 0 84.19900035 0.8414994749999991 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1002 4.61551188309 0 84.13349915 0.939751274999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1003 4.56549440949 0 84.69500005 0.4649999500000007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1004 2.45770641547 0 84.9455009 0.21449910000000616 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1005 4.50209909388 0 84.090499975 1.0042500375000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1006 3.62274145966 0 84.25799995 0.7530000750000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1007 3.35149437292 0 85.331499925 0.12850007500000232 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1008 4.42174155156 0 84.472500425 0.6874995749999983 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1009 3.42944293235 0 84.1065001 0.9802498499999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1010 4.17689583184 0 83.5660006 1.7909991000000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1011 2.69064816356 0 85.183000025 0.2769999750000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1012 3.03418372506 0 84.42499865 0.5025020249999983 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1013 5.06758855666 0 84.20349975 0.8347503749999987 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1014 2.28268924961 0 84.152999325 0.9105010125000135 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1015 4.84289719126 0 84.198500425 0.8422493624999987 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1016 2.95217432031 0 84.621499975 0.538500024999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1017 5.11150591832 0 84.08950015 1.0057497749999982 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1018 5.20166453319 0 84.322499275 0.65625108750001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1019 3.65459743556 0 85.40350105 0.056498950000002435 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1020 5.24794714078 0 84.1174996 0.9637506000000045 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1021 4.48904909382 0 83.950000025 1.2149999625000163 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1022 1.91370764343 0 84.51899985 0.641000150000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1023 4.94025067672 0 84.159999475 0.900000787499998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1024 5.20997422309 0 83.6000002 1.7399997000000127 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1025 1.94407886078 0 84.4380006 0.4829991000000149 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1026 4.63000451649 0 84.37299955 0.5805006750000032 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1027 3.48571779877 0 84.536999725 0.623000275000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1028 4.70385465798 0 84.17350025 0.8797496250000023 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1029 3.54863373783 0 85.508500075 -0.04850007499999548 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1030 2.54246041813 0 84.002999975 1.135500037500016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1031 3.28030657478 0 83.732499825 1.5412502625000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1032 4.38883904114 0 84.404000025 0.5339999625000047 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1033 4.94025067672 0 84.49599915 0.6640008500000022 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1034 2.7823322265 0 83.805999775 1.4310003375000022 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1035 5.29506075557 0 84.063499725 1.044750412500008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1036 2.35939223224 0 84.255000275 0.757499587500007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1037 4.45514111937 0 83.9215 1.2577500000000157 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1038 4.42174155156 0 84.553499025 0.6065009750000115 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1039 4.78711530883 0 83.76300065 1.495499025000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1040 4.77162407078 0 83.67400035 1.6289994750000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1041 4.85885544059 0 84.3159998 0.6660003000000074 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1042 4.63658695371 0 83.69750025 1.593749625000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1043 4.74167028213 0 84.574500575 0.5854994250000033 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1044 2.94605637042 0 85.681999675 -0.221999674999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1045 2.97872563985 0 84.83500035 0.32499965000000375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1046 4.23690511031 0 83.579999525 1.7700007125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1047 5.11150591832 0 84.190999575 0.853500637499998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1048 2.27543803127 0 83.7610006 1.4984991000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1049 4.70385465798 0 84.326499375 0.6502509375000116 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1050 3.89600156508 0 84.084000775 1.0139988374999973 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1051 2.75602585333 0 83.7834995 1.4647507500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1052 3.08174415116 0 84.698999775 0.4610002250000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1053 3.23388836206 0 84.3539997 0.6090004500000035 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1054 3.25488617683 0 85.49199935 -0.031999349999995264 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1055 3.11726491618 0 83.722501 1.5562485000000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1056 4.09568311293 0 84.7879989 0.3720011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1057 4.68074200425 0 82.839501025 2.8807484625000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1058 4.21872205213 0 82.899499925 2.7907501125000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1059 4.63658695371 0 83.758500025 1.5022499625000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1060 4.17689583184 0 83.2039997 2.334000450000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1061 3.59623494226 0 83.256499825 2.255250262499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1062 2.15414066881 0 83.709500275 1.5757495875000131 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1063 4.68074200425 0 83.797000425 1.444499362500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1064 4.23690511031 0 84.951499925 0.208500075000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1065 2.81925910919 0 85.541500325 -0.0815003249999961 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1066 3.18458942266 0 83.054500675 2.558248987500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1067 4.56481779115 0 84.1305011 0.9442483500000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1068 3.51061454803 0 84.48900035 0.6709996500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1069 4.23690511031 0 83.927499375 1.248750937500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1070 3.58748542243 0 84.0855007 1.0117489500000119 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1071 2.93135224398 0 83.9939995 1.1490007500000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1072 2.93135224398 0 83.29949975 2.1907503750000146 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1073 3.57011348802 0 84.0904995 1.004250749999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1074 3.32835645011 0 84.032000125 1.0919998125000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1075 4.60761017185 0 83.177500175 2.373749737499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1076 4.60761017185 0 82.949500075 2.715749887500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1077 4.78711530883 0 83.34050005 2.1292499250000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1078 4.49523657385 0 83.710001025 1.5749984625000124 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1079 4.52975999063 0 83.673999975 1.6290000375000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1080 4.71064890814 0 83.7225004 1.5562494000000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1081 3.22287236085 0 85.8445007 -0.3845006999999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1082 4.42774475186 0 83.860500775 1.3492488374999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1083 3.59194793631 0 82.866000325 2.840999512500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1084 2.95491207034 0 82.98349935 2.664750975000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1085 4.53672840544 0 83.43900085 1.9814987250000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1086 4.01210248164 0 83.82750075 1.3987488750000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1087 4.93119904091 0 82.779999175 2.970001237500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1088 2.94605637042 0 85.023499225 0.13650077500001034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1089 3.61386264477 0 84.471500375 0.6884996250000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1090 4.23690511031 0 85.153500175 0.006499825000000237 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1091 3.73279362334 0 84.19550035 0.8467494750000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1092 3.79036576283 0 83.019999925 2.6100001125000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1093 2.91115808335 0 82.817999575 2.913000637500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1094 4.13009860961 0 83.7584997 1.5022504500000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1095 2.79256263316 0 85.383500175 0.07649982500001046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1096 3.35104871505 0 84.316499925 0.6652501125000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1097 2.54220394201 0 84.5500002 0.6099998000000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1098 2.57325940549 0 85.577000575 -0.11700057499999444 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1099 3.0527280537 0 84.7215004 0.43849960000000865 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1100 2.90250153505 0 85.438500075 0.02149992500001191 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1101 2.90250153505 0 85.4419999 0.018000100000006125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1102 3.36634957954 0 84.4759992 0.6840008000000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1103 2.57325940549 0 85.649500525 -0.18950052499998832 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1104 3.17089287752 0 84.664999575 0.4950004250000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1105 2.23579750603 0 84.4165005 0.5152492500000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1106 3.25488617683 0 84.6055004 0.5544996000000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1107 2.14786504266 0 84.86900085 0.290999149999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1108 2.06800584288 0 85.24100055 0.2189994500000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1109 3.61386264477 0 84.338999775 0.6315003375000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1110 2.71542297875 0 84.4300004 0.49499940000001175 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1111 2.37819629574 0 84.431499375 0.49275093750000565 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1112 2.35917135957 0 84.67300035 0.4869996500000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1113 1.70856970404 0 85.379997 0.0800030000000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1114 3.48571779877 0 84.40549945 0.5317508250000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1115 2.87421319527 0 85.49099965 -0.0309996500000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1116 1.95815465425 0 84.98050025 0.1794997499999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1117 2.520476477 0 84.85250035 0.30749965000000545 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1118 3.02145143763 0 84.340000375 0.629999437500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1119 2.93135224398 0 84.60249935 0.5575006500000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1120 2.90250153505 0 85.236000225 0.22399977500000717 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1121 3.04014002515 0 84.53899935 0.6210006500000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1122 3.32835645011 0 84.46 0.45000000000001705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1123 2.08260997416 0 85.3374998 0.12250020000000178 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1124 2.90250153505 0 84.53899995 0.6210000500000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1125 2.25801563131 0 84.70149975 0.4585002500000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1126 2.34790180657 0 85.076000025 0.08399997500000611 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1127 3.15731364232 0 84.309000375 0.6764994375000128 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1128 3.13716146983 0 84.2914997 0.7027504500000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1129 3.36634957954 0 84.5414993 0.618500700000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1130 2.65671333449 0 85.414500225 0.045499775000007514 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1131 2.25112114639 0 85.206000475 0.25399952500000894 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1132 3.51061454803 0 84.415499925 0.5167501124999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1133 2.86305165382 0 84.520499675 0.6395003250000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1134 2.06800584288 0 85.2094993 0.25050070000000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1135 1.87955651685 0 84.300499575 0.6892506375000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1136 2.52912274255 0 84.443 0.4755000000000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1137 3.21935410129 0 84.363999875 0.5940001875000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1138 4.98960168004 0 83.727999825 1.548000262500011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1139 3.88013266186 0 84.2445007 0.7732489500000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1140 4.29866382416 0 83.736000425 1.5359993625000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1141 3.36287839839 0 84.9799994 0.18000060000000817 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1142 4.11336811599 0 84.948000175 0.21199982500000092 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1143 3.13716146983 0 84.58100025 0.5789997500000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1144 3.22639831866 0 84.114499475 0.9682507875000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1145 4.63728501684 0 84.447499875 0.4687501874999995 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1146 3.52359981675 0 85.3645004 0.09549960000000796 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1147 3.36671753766 0 85.755499775 -0.29549977499999897 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1148 4.60110969497 0 84.51249985 0.6475001500000076 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1149 2.996952631 0 84.0304993 1.094251050000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1150 3.62274145966 0 84.2284994 0.7972509000000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1151 5.15619113861 0 84.3365008 0.6352488000000136 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1152 3.92713031704 0 85.00750055 0.15249945000000198 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1153 4.92375419354 0 84.06299915 1.0455012750000137 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1154 4.23140792301 0 84.767499175 0.3925008250000076 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1155 3.37064741875 0 84.136499825 0.9352502625000056 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1156 2.90250153505 0 84.498500825 0.6614991750000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1157 3.89600156508 0 84.886999325 0.27300067500000014 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1158 4.81915540131 0 84.243999925 0.774000112500012 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1159 3.15056754597 0 83.5110002 1.8734997000000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1160 2.89145135544 0 84.800000225 0.359999775 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1161 2.21913447815 0 84.9034996 0.2565004000000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1162 4.53042625525 0 84.059999225 1.0500011625000099 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1163 4.8661051796 0 83.75349945 1.5097508249999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1164 4.42174155156 0 84.588999025 0.5710009749999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1165 2.53372287512 0 85.0210006 0.13899940000001154 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1166 3.89600156508 0 84.07499965 1.0275005250000149 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1167 3.2442204033 0 85.396500425 0.06349957500000586 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1168 3.37064741875 0 83.959000575 1.2014991375000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1169 2.26864111687 0 84.7609998 0.3990002000000118 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1170 4.8992150082 0 84.030000525 1.0949992124999994 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1171 5.06758855666 0 84.252499175 0.7612512375000122 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1172 3.3138339442 0 83.9520012 1.2119982000000107 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1173 3.27701048398 0 84.43649945 0.4852508250000085 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1174 5.15619113861 0 84.062499425 1.0462508625000098 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1175 3.48199621824 0 83.47000045 1.9349993250000068 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1176 4.45514111937 0 83.86149985 1.3477502250000057 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1177 3.67738324523 0 85.15999925 7.500000066551493e-07 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1178 2.95195426078 0 84.16750005 0.8887499250000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1179 4.63728501684 0 84.36899985 0.586500225000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1180 3.79571140468 0 84.061999775 1.0470003375000019 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1181 3.73279362334 0 84.300998875 0.6885016875000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1182 3.06230535434 0 85.63949985 -0.17949985000000196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1183 3.62274145966 0 84.0994999 0.9907501500000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1184 4.05678387799 0 84.872500425 0.2874995750000068 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1185 5.06758855666 0 84.19599995 0.8460000750000063 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1186 3.22639831866 0 84.318999525 0.6615007125000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1187 2.73080396045 0 85.858501025 -0.39850102499999024 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1188 5.24794714078 0 84.07850005 1.0222499250000041 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1189 3.66367779072 0 84.929499375 0.23050062499999913 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1190 4.17689583184 0 84.15650035 0.9052494750000051 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1191 4.00665156404 0 84.109999875 0.975000187500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1192 3.49397738577 0 84.228999825 0.7965002625000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1193 3.61386264477 0 83.93299965 1.2405005250000087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1194 4.78009885505 0 83.9950004 1.1474994000000152 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1195 4.94025067672 0 84.21699965 0.8145005249999997 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1196 4.71136945196 0 83.913500275 1.2697495875000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1197 2.77965392715 0 83.822999425 1.4055008624999985 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1198 4.38883904114 0 84.446500575 0.4702491375000051 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1199 3.18148278982 0 85.370500225 0.08949977500000444 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1200 4.70385465798 0 84.15549925 0.9067511249999995 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1201 2.48024552199 0 85.467500325 -0.0075003249999980315 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1202 2.79256263316 0 83.691500675 1.6027489875000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1203 5.20078623125 0 82.904999725 2.7825004125000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1204 3.53595970915 0 82.8145004 2.9182494000000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1205 2.83279967723 0 83.692000175 1.6019997375000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1206 2.98180151453 0 83.400999475 2.0385007874999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1207 3.31339824504 0 84.58200015 0.5779998500000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1208 3.24779325302 0 83.4039996 2.034000599999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1209 4.23690511031 0 83.895500925 1.2967486125000178 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1210 4.21269566507 0 83.9574997 1.2037504500000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1211 4.31754406538 0 85.1849999 0.2750001000000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1212 5.40080120652 0 83.6874994 1.6087509000000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1213 4.26756104107 0 84.1684996 0.8872506000000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1214 4.40789063084 0 83.836500375 1.3852494375000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1215 2.90250153505 0 85.452999475 0.00700052500000653 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1216 3.90066133282 0 83.6710012 1.6334981999999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1217 4.09568311293 0 83.6889996 1.606500600000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1218 2.32202027362 0 83.543500275 1.8247495875000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1219 3.9162009563 0 84.6829998 0.47700020000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1220 4.55072975326 0 85.096499475 0.06350052500000347 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1221 4.98960168004 0 83.7640008 1.4939988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1222 3.48571779877 0 84.50499995 0.6550000500000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1223 3.82984987584 0 83.013000225 2.620499662500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1224 3.95294609782 0 83.464500725 1.9432489125000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1225 3.31720456502 0 83.103500175 2.4847497375000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1226 3.97961764859 0 84.113999175 0.9690012375000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1227 4.34938240412 0 83.86949975 1.3357503750000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1228 3.93186488986 0 83.912500025 1.2712499625000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1229 2.80051833724 0 84.850498825 0.3095011750000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1230 4.52975999063 0 83.901999775 1.287000337500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1231 3.29121134217 0 83.7880003 1.457999550000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1232 4.42774475186 0 83.8934997 1.2997504499999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1233 3.84974163103 0 83.78649965 1.4602505250000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1234 2.98180151453 0 83.39149895 2.052751575000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1235 4.95606048827 0 83.6475006 1.6687491000000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1236 5.03217204616 0 83.9230005 1.2554992500000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1237 4.18282015663 0 83.865999325 1.3410010125000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1238 4.79489870881 0 83.8415007 1.3777489500000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1239 2.81925910919 0 85.5619991 -0.10199909999998907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1240 2.06800584288 0 85.200000025 0.2599999750000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1241 2.25112114639 0 85.3334991 0.12650090000000774 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1242 2.2528774348 0 84.559500325 0.6004996750000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1243 2.20731326885 0 85.2654997 0.19450029999999857 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1244 1.98450862124 0 84.213499675 0.8197504875000163 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1245 3.25488617683 0 84.619999825 0.5400001750000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1246 1.97124355876 0 84.7954997 0.36450030000001166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1247 2.57325940549 0 85.617999425 -0.15799942499998848 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1248 2.50761735877 0 84.6404995 0.5195005000000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1249 2.09298780883 0 85.132 0.028000000000000136 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1250 2.22395915051 0 85.2194998 0.240500200000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1251 2.40535258985 0 84.41350025 0.51974962500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1252 2.00610810282 0 85.483500225 -0.0235002249999951 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1253 3.61386264477 0 84.378500175 0.5722497375000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1254 3.61386264477 0 84.35549885 0.6067517250000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1255 2.86305165382 0 84.409499925 0.525750112499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1256 2.69064816356 0 84.656499375 0.5035006250000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1257 2.52912274255 0 84.3340002 0.6389996999999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1258 2.48647461628 0 84.448000275 0.46799958750001025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1259 2.47811701105 0 84.3744995 0.5782507500000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1260 2.71542297875 0 84.460999525 0.6990004749999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1261 3.38956467165 0 84.596000075 0.5639999249999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1262 3.04014002515 0 84.447499975 0.46875003750000843 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1263 2.87421319527 0 84.63200015 0.527999850000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1264 2.44524097268 0 84.229999375 0.7950009374999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1265 2.81925910919 0 85.5640009 -0.10400089999999124 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1266 2.29313137734 0 85.1404991 0.019500900000005594 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1267 2.71542297875 0 85.543500475 -0.08350047499999674 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1268 2.09742183942 0 85.79849995 -0.33849994999998784 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1269 2.2528774348 0 85.329499975 0.13050002500000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1270 2.15883788221 0 85.187499975 0.27250002500001075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1271 2.20731326885 0 85.132500875 0.027499124999999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1272 2.3293565891 0 84.273499475 0.7297507875000164 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1273 3.51061454803 0 84.3639995 0.5940007499999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1274 2.520476477 0 84.8505002 0.3094998000000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1275 1.9038241963 0 84.779999 0.3800010000000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1276 3.38956467165 0 84.550499525 0.6095004749999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1277 2.14786504266 0 84.65950045 0.5004995500000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1278 1.72558649193 0 85.040001 0.11999900000000141 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1279 2.61896392029 0 84.386499625 0.5602505625000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1280 3.75178902153 0 84.26000005 0.7499999250000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1281 2.81925910919 0 84.58749985 0.5725001500000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1282 3.36634957954 0 84.516499525 0.6435004750000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1283 3.06230535434 0 83.832 1.3920000000000172 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1284 3.63209225801 0 85.42550045 0.034499550000003876 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1285 2.57325940549 0 85.851500975 -0.3915009749999882 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1286 3.75178902153 0 84.207999775 0.8280003375000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1287 3.48571779877 0 84.564999925 0.5950000750000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1288 4.20126736815 0 84.1924997 0.8512504500000091 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1289 3.28030657478 0 85.403499775 0.0565002250000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1290 5.02441943023 0 84.242999675 0.7755004875000182 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1291 4.29300531776 0 84.7849997 0.37500030000000495 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1292 4.23140792301 0 85.023499825 0.13650017500000333 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1293 3.05626197049 0 84.66499995 0.49500005000001013 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1294 2.88546210335 0 83.666500625 1.6402490625000112 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1295 4.85885544059 0 84.495499775 0.6645002250000062 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1296 2.51407607482 0 83.68499965 1.6125005250000157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1297 3.63209225801 0 83.568000225 1.787999662499999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1298 3.52741090964 0 85.570000275 -0.11000027499999304 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1299 2.8168240943 0 85.806501225 -0.3465012250000001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1300 3.37064741875 0 83.6190004 1.711499400000001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1301 2.95809675652 0 83.756499825 1.5052502624999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1302 3.3138339442 0 84.3659999 0.5910001499999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1303 3.62274145966 0 84.19999905 0.8400014250000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1304 4.8992150082 0 84.416499575 0.515250637500003 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1305 2.7823322265 0 84.07849965 1.0222505250000111 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1306 4.21929995362 0 84.0539999 1.059000150000017 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1307 2.50569074066 0 85.098000725 0.061999274999999465 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1308 4.94025067672 0 84.530999025 0.6290009750000053 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1309 2.35356305584 0 84.9864991 0.17350090000000196 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1310 3.44136359344 0 85.404999825 0.055000175000000706 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1311 5.06758855666 0 84.538499825 0.6215001750000028 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1312 2.64011518851 0 84.10400025 0.9839996250000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1313 2.58912177825 0 84.91949895 0.24050105000000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1314 2.95195426078 0 84.015999925 1.1160001125000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1315 5.07547506828 0 83.800500825 1.4392487625000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1316 3.99579404276 0 83.729001 1.5464985000000127 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1317 2.92871983878 0 84.704999725 0.45500027500001183 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1318 4.06183643479 0 83.56849895 1.787251574999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1319 2.64029120901 0 83.71000045 1.5749993250000145 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1320 4.29300531776 0 84.1159999 0.9660001499999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1321 2.95195426078 0 83.826499775 1.4002503375000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1322 4.53042625525 0 84.7080003 0.45199970000001033 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1323 4.98197957863 0 84.40399915 0.5340012750000014 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1324 3.79571140468 0 84.614998875 0.5450011250000074 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1325 2.65915985321 0 85.34999965 0.11000035000000424 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1326 3.37064741875 0 84.3649993 0.592501050000017 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1327 4.17689583184 0 84.12200105 0.9569984250000161 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1328 5.15619113861 0 84.27650015 0.7252497750000018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1329 4.63728501684 0 84.7355007 0.42449930000000224 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1330 2.54900634755 0 83.790999225 1.453501162500018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1331 4.23690511031 0 83.6294996 1.6957506000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1332 3.40961783577 0 84.027500375 1.098749437500004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1333 2.79817206231 0 84.89100015 0.2689998500000087 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1334 3.03418372506 0 84.971499875 0.1885001249999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1335 2.81925910919 0 83.92849995 1.2472500750000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1336 4.8661051796 0 83.95100085 1.213498725000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1337 2.94339750184 0 85.747499675 -0.2874996749999951 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1338 3.18458942266 0 83.47799915 1.9230012750000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1339 3.69537408729 0 83.8855 1.3117500000000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1340 4.98960168004 0 83.982999525 1.1655007125000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1341 3.40559661135 0 85.217000375 0.24299962500000732 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1342 3.52741090964 0 85.022499625 0.13750037500001044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1343 3.37064741875 0 85.420500025 0.039499975000009624 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1344 1.81816529668 0 84.532000175 0.627999825000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1345 3.56620961403 0 83.51199985 1.872000225000015 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1346 2.2494406043 0 85.57499995 -0.11499995000000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1347 3.83520142642 0 84.558000875 0.6019991250000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1348 3.17437865535 0 83.872500175 1.3312497375000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1349 3.54863373783 0 85.709999775 -0.24999977499999487 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1350 3.3549420805 0 84.014000875 1.1189986875000173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1351 3.18148278982 0 84.84999985 0.3100001500000019 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1352 4.15978483669 0 84.3344996 0.6382506000000063 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1353 3.54863373783 0 83.9290011 1.2464983500000173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1354 3.34732604321 0 83.0205 2.60925000000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1355 3.46931522498 0 84.52299915 0.6370008500000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1356 2.53782853275 0 83.95850035 1.2022494750000163 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1357 4.2552455881 0 84.7270005 0.43299950000000254 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1358 5.45071234571 0 83.55850015 1.802249775000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1359 4.03958106713 0 83.344000625 2.1239990624999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1360 2.85750332813 0 83.290500225 2.204249662500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1361 2.77422947385 0 83.747999425 1.5180008625000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1362 4.74857429339 0 83.670500575 1.6342491374999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1363 4.20069439548 0 83.745500575 1.5217491375000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1364 4.63658695371 0 83.4800001 1.9199998500000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1365 4.38821376777 0 83.62699875 1.69950187500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1366 3.81982297414 0 82.977500375 2.6737494374999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1367 4.23690511031 0 83.86299955 1.345500675000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1368 2.71542297875 0 83.72449995 1.5532500750000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1369 2.77678606216 0 83.733000325 1.5404995124999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1370 2.32202027362 0 84.432000225 0.4919996625000138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1371 2.12768675694 0 83.7459999 1.521000150000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1372 4.75623276948 0 82.9179996 2.763000600000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1373 4.09568311293 0 83.8785009 1.3222486499999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1374 2.88828810173 0 83.65300025 1.660499625 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1375 4.02854458864 0 83.9105015 1.274247750000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1376 3.8247770812 0 83.755000275 1.507499587500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1377 2.75345263332 0 83.42750075 1.9987488750000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1378 2.78728389469 0 84.53850085 0.6214991499999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1379 4.53672840544 0 83.794000075 1.4489998875000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1380 2.86589315985 0 84.18250045 0.8662493249999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1381 4.38821376777 0 83.12999975 2.4450003750000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1382 3.23709823473 0 83.0440008 2.5739987999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1383 3.51061454803 0 83.959999875 1.2000001875000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1384 3.15738556461 0 83.069499225 2.5357511625000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1385 2.27196931651 0 82.832000175 2.891999737500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1386 2.66632133899 0 83.85299985 1.3605002250000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1387 4.16509738546 0 85.040499675 0.11950032499999852 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1388 5.20078623125 0 82.975999675 2.676000487500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1389 3.0527280537 0 85.170500525 0.2894994750000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1390 3.90066133282 0 83.36350095 2.094748575000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1391 3.40522010623 0 84.39700035 0.5444994750000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1392 3.77580687597 0 84.02500015 1.1024997750000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1393 4.09568311293 0 84.999999975 0.16000002500001076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1394 4.13009860961 0 83.86299945 1.345500825000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1395 4.03405527942 0 82.968000025 2.6879999625000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1396 4.17689583184 0 83.251499825 2.2627502625000133 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1397 3.8247770812 0 84.375999825 0.5760002625000169 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1398 4.59325717765 0 83.679000575 1.6214991375000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1399 5.1018164655 0 83.734499825 1.5382502625000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1400 2.67599909044 0 83.661499825 1.6477502624999971 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1401 3.36634957954 0 84.663999125 0.49600087500000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1402 2.15414066881 0 82.9609996 2.698500600000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1403 2.65915985321 0 83.14099995 2.4285000750000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1404 3.35104871505 0 84.30000015 0.689999775000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1405 2.14786504266 0 84.893499275 0.26650072500000876 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1406 3.51061454803 0 84.51449985 0.645500149999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1407 1.95815465425 0 85.7240007 -0.2640006999999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1408 2.75602585333 0 85.274500275 0.18549972500001105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1409 2.57325940549 0 84.86099985 0.29900015000000624 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1410 2.22395915051 0 85.109500175 0.05049982500001138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1411 2.87421319527 0 85.44650065 0.013499350000000743 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1412 2.08260997416 0 85.516 -0.056000000000000216 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1413 1.97124355876 0 84.79399855 0.3660014500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1414 2.57325940549 0 85.588500725 -0.12850072499999782 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1415 2.64243046275 0 85.421999775 0.03800022500000183 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1416 2.21727076111 0 84.684499525 0.47550047499999837 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1417 2.18607703807 0 84.932499675 0.2275003250000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1418 2.52912274255 0 84.3734993 0.5797510499999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1419 2.36674469012 0 84.5410002 0.618999800000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1420 3.0527280537 0 84.729000675 0.4309993250000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1421 2.90250153505 0 85.478500225 -0.018500224999999648 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1422 3.36634957954 0 84.5564997 0.6035003000000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1423 2.79256263316 0 85.561000025 -0.10100002499999333 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1424 2.09298780883 0 84.5274999 0.63250010000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1425 2.48647461628 0 84.285500375 0.7117494375000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1426 3.04014002515 0 84.543999975 0.6160000249999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1427 1.70856970404 0 85.779999 -0.31999899999999853 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1428 2.90250153505 0 85.56850055 -0.10850054999999087 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1429 3.61386264477 0 84.393499375 0.5497509375000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1430 2.57325940549 0 84.736500275 0.42349972500000777 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1431 3.32835645011 0 84.31750025 0.6637496250000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1432 2.73050807996 0 84.782500225 0.3774997749999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1433 2.520476477 0 84.8880012 0.2719988000000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1434 2.47811701105 0 84.3615003 0.5977495500000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1435 2.64243046275 0 85.653000575 -0.19300057499998785 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1436 2.57325940549 0 85.5369999 -0.07699989999999274 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1437 2.52912274255 0 84.65049975 0.5095002500000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1438 2.65671333449 0 85.54450035 -0.08450035000000183 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1439 2.42513277215 0 84.3444992 0.6232512000000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1440 2.71542297875 0 85.47249925 -0.012499249999993356 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1441 1.93249146701 0 84.870000275 0.2899997250000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1442 2.25801563131 0 84.46749945 0.6925005499999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1443 2.66632133899 0 85.7044995 -0.24449949999999204 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1444 2.40535258985 0 84.415500475 0.516749287500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1445 2.79256263316 0 85.395500225 0.06449977499999876 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1446 3.32835645011 0 84.4009996 0.538500599999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1447 3.11726491618 0 84.439500675 0.48074898750000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1448 2.96673931882 0 84.50599935 0.654000650000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1449 3.21935410129 0 84.388998625 0.5565020625000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1450 2.79256263316 0 84.655999925 0.5040000750000019 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1451 5.03217204616 0 83.61450005 1.718249925000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1452 4.30554062944 0 84.4665009 0.6934991000000054 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1453 2.77703641729 0 84.832000325 0.3279996750000095 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1454 2.95787581321 0 85.771999525 -0.3119995249999931 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1455 4.48904909382 0 83.915499525 1.2667507125000057 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1456 4.68074200425 0 83.47350045 1.929749325000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1457 4.71136945196 0 84.43299955 0.49050067499999983 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1458 4.45514111937 0 83.878499675 1.322250487500007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1459 2.95787581321 0 85.58850055 -0.1285005500000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1460 4.07305633168 0 83.997499325 1.1437510124999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1461 3.04014002515 0 84.52049955 0.6395004500000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1462 2.56430926229 0 84.332500825 0.6412487625000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1463 3.89600156508 0 84.120999875 0.9585001875000145 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1464 4.39538031203 0 84.456999725 0.4545004125000034 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1465 4.24911447842 0 83.8260002 1.400999700000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1466 3.40961783577 0 84.109000375 0.9764994375000171 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1467 3.16076961237 0 85.749999825 -0.28999982499999816 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1468 4.45514111937 0 84.654498875 0.5055011250000035 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1469 3.62274145966 0 84.223499725 0.8047504125000131 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1470 4.35642257412 0 84.718999525 0.44100047500000417 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1471 2.80851950068 0 83.73500055 1.5374991750000149 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1472 3.04014002515 0 84.6999993 0.46000070000000337 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1473 3.19188638661 0 84.054999725 1.057500412500005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1474 2.09164547147 0 85.118001175 0.04199882500000174 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1475 2.37819629574 0 83.572999775 1.7805003375000084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1476 3.19188638661 0 85.14950045 0.010499550000000135 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1477 2.32576069415 0 85.365999575 0.09400042500000155 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1478 4.63728501684 0 84.182000175 0.8669997375000094 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1479 3.80503735759 0 84.14800005 0.9179999250000179 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1480 4.45514111937 0 84.49699995 0.6630000500000023 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1481 3.06572586948 0 84.7584995 0.4015005000000059 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1482 4.55843741409 0 83.88599915 1.3110012750000024 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1483 4.33659088676 0 83.991000825 1.153498762500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1484 4.8992150082 0 84.44749955 0.46875067500000256 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1485 4.67403368929 0 84.212499625 0.8212505624999977 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1486 5.15619113861 0 84.23949985 0.7807502250000056 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1487 4.85885544059 0 84.102498825 0.9862517625000109 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1488 4.03958106713 0 83.91450035 1.2682494750000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1489 3.08174415116 0 84.2915001 0.7027498500000178 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1490 3.03083319426 0 85.490499575 -0.03049957499999606 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1491 3.37413466519 0 85.1354994 0.024500600000004646 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1492 3.35149437292 0 84.020998975 1.1085015375000111 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1493 3.57011348802 0 84.199500475 0.8407492875000173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1494 4.60761017185 0 83.671999325 1.6320010125000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1495 3.46931522498 0 84.2044989 0.8332516500000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1496 4.18876131089 0 84.02900055 1.0964991749999982 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1497 3.55290898553 0 84.258499 0.752251500000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1498 3.17089287752 0 84.838000275 0.3219997250000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1499 3.35104871505 0 84.07699945 1.024500825000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1500 2.48024552199 0 85.013499775 0.14650022500000548 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1501 4.749306487 0 83.945500175 1.221749737499998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1502 3.06572586948 0 84.238999375 0.7815009374999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1503 1.82384732846 0 86.0984997 -0.6384996999999999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1504 5.1643560615 0 83.596000275 1.745999587500016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1505 3.15731364232 0 85.310000375 0.1499996250000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1506 3.7139896045 0 84.292499775 0.7012503375000136 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1507 5.24794714078 0 84.065999225 1.0410011625000095 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1508 4.78711530883 0 83.602000225 1.7369996625000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1509 3.44499877858 0 84.628500675 0.5314993250000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1510 5.20166453319 0 84.00300095 1.1354985750000068 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1511 3.09112547159 0 85.83099975 -0.3709997499999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1512 4.44842090138 0 84.164500075 0.8932498874999979 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1513 3.87043543601 0 84.331499275 0.6427510875000095 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1514 3.07180677592 0 83.705500425 1.5817493625000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1515 3.12725210162 0 83.8950006 1.2974991000000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1516 4.68074200425 0 83.5494997 1.8157504500000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1517 4.67332451942 0 83.674999825 1.627500262500007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1518 4.8661051796 0 83.5669996 1.7895006000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1519 4.11856269266 0 83.9880001 1.1579998500000173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1520 5.50155459048 0 83.54749985 1.8187502250000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1521 4.87414777473 0 83.681000175 1.6184997375000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1522 4.46799443574 0 83.5689989 1.7865016500000124 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1523 3.97961764859 0 83.991001 1.153498500000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1524 2.87421319527 0 85.456000925 0.003999075000007235 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1525 4.33022330151 0 84.01749975 1.1137503750000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1526 4.36224960626 0 84.09600055 0.9959991750000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1527 4.42110686856 0 82.82649955 2.9002506750000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1528 4.55072975326 0 84.85199985 0.3080001500000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1529 3.61386264477 0 84.846000625 0.3139993750000031 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1530 4.23690511031 0 83.9835007 1.1647489500000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1531 3.6587939114 0 83.065500075 2.5417498875000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1532 2.43921651142 0 84.0864992 1.010251199999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1533 2.77945883512 0 84.957500475 0.2024995250000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1534 4.00665156404 0 84.114000825 0.9689987625 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1535 5.60613859814 0 83.308 2.1779999999999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1536 2.86589315985 0 83.48950085 1.9057487250000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1537 4.44777853689 0 84.82299955 0.33700044999999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1538 5.45071234571 0 83.319500375 2.1607494375000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1539 2.46568541903 0 84.11799985 0.9630002250000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1540 4.57189457086 0 83.727999675 1.5480004874999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1541 2.50340530846 0 85.493999925 -0.03399992499999199 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1542 4.71064890814 0 83.237999925 2.2830001125000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1543 5.60613859814 0 83.24800015 2.267999775000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1544 3.40522010623 0 84.3720006 0.5819990999999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1545 4.6004224797 0 83.61300065 1.7204990250000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1546 4.82628706711 0 83.58050005 1.769249925000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1547 4.98960168004 0 83.851999775 1.3620003375000138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1548 4.0900028821 0 84.062500975 1.0462485374999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1549 4.48157410599 0 82.94499975 2.722500375000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1550 4.29866382416 0 83.38900035 2.0564994750000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1551 3.28388159021 0 84.4439991 0.47400135000000887 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1552 3.44499877858 0 84.365500175 0.5917497375000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1553 2.40535258985 0 84.689001 0.4709990000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1554 3.04334411031 0 83.044500375 2.57324943750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1555 3.98499520747 0 83.183999775 2.364000337500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1556 2.81925910919 0 83.18050005 2.3692499249999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1557 5.1643560615 0 83.18350065 2.364749025000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1558 3.8397168061 0 84.010000775 1.1249988375000157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1559 3.40522010623 0 83.8460004 1.3709994000000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1560 5.11065779663 0 82.902000325 2.786999512500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1561 4.11856269266 0 83.042499125 2.5762513124999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1562 2.91691693472 0 83.645999325 1.6710010124999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1563 4.71064890814 0 83.300500225 2.1892496625000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1564 4.51588718524 0 84.844500175 0.31549982500001195 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1565 4.42774475186 0 83.597499875 1.7437501875000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1566 2.28783231137 0 83.186499825 2.36025026250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1567 4.06183643479 0 83.9924988 1.1512517999999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1568 4.68074200425 0 83.5995012 1.7407481999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1569 4.68074200425 0 83.36649875 2.090251875 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf1570 3.09112547159 0 85.651999625 -0.19199962499999684 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt deleted file mode 100644 index cb23d8702f8bd2c916f960f3c2f923695500404a..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ /dev/null @@ -1,8822 +0,0 @@ -+++++ -conf1 1 0 84.76 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf1 2.2528774348 0 84.934166125 0.22583387500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf2 2.64243046275 0 84.865833075 0.2941669250000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf3 2.66632133899 0 84.658333775 0.5016662250000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf4 2.47811701105 0 84.32499935 0.6525009750000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf5 2.71542297875 0 84.612499575 0.5475004250000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf6 2.48647461628 0 84.369999825 0.5850002625000172 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf7 2.18607703807 0 84.913332675 0.24666732499999855 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf8 2.520476477 0 84.761666675 0.3983333250000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf9 2.14786504266 0 84.611666325 0.5483336750000035 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf10 2.08260997416 0 85.52083355 -0.06083355000000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf11 3.26208692053 0 84.43416695 0.48874957499999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf12 1.93249146701 0 85.0224993 0.13750069999999825 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf13 2.37819629574 0 84.301667075 0.6874993875000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf14 2.59591050603 0 84.633333975 0.5266660250000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf15 3.36634957954 0 84.3450008 0.6224988000000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf16 2.21727076111 0 84.541666225 0.618333775000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf17 2.00610810282 0 85.44833395 0.011666049999999484 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf18 2.69064816356 0 84.45250075 0.46124887500000966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf19 2.46568541903 0 84.61000045 0.549999550000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf20 3.17089287752 0 84.54416695 0.6158330500000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf21 2.23579750603 0 84.8641672 0.295832800000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf22 3.13716146983 0 84.5308332 0.6291668000000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf23 2.08260997416 0 85.285833225 0.1741667750000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf24 2.48647461628 0 84.761667075 0.3983329249999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf25 2.15883788221 0 84.53166635 0.6283336500000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf26 3.35104871505 0 84.274166675 0.7287499875000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf27 2.35917135957 0 84.475000275 0.6849997250000058 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf28 2.87421319527 0 85.28833285 0.17166715000000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf29 2.29313137734 0 85.28333225 0.1766677500000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf30 2.2528774348 0 84.905833825 0.25416617500000316 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf31 3.04014002515 0 84.4699985 0.6900015000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf32 2.71542297875 0 84.4300005 0.49499924999999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf33 2.36674469012 0 85.01249975 0.14750025000000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf34 3.15731364232 0 84.24083425 0.7787486249999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf35 2.20731326885 0 84.82416635 0.3358336500000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf36 2.66632133899 0 84.546667125 0.6133328750000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf37 3.25488617683 0 84.4799997 0.6800003000000118 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf38 2.18607703807 0 85.082500325 0.0774996750000071 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf39 3.21935410129 0 84.4591669 0.4512496500000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf40 2.81925910919 0 85.035832975 0.12416702499999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf41 2.22395915051 0 84.682500925 0.47749907500000577 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf42 2.93135224398 0 85.0933329 0.06666710000001219 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf43 2.09742183942 0 85.404999575 0.055000425000000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf44 2.64243046275 0 84.740832575 0.41916742500000626 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf45 2.90250153505 0 84.2641668 0.7437498000000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf46 3.48571779877 0 84.3499996 0.6150006000000019 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf47 2.64243046275 0 85.1216655 0.03833449999999916 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf48 2.03940354341 0 84.938333375 0.221666625000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf49 2.47811701105 0 84.36416765 0.5937485250000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf50 2.65671333449 0 85.232499325 0.22750067499999888 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf51 2.81925910919 0 84.83916685 0.3208331500000071 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf52 2.73050807996 0 85.11000015 0.0499998500000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf53 2.22395915051 0 85.0250004 0.134999600000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf54 2.46568541903 0 84.3908332 0.5537502000000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf55 1.79160865678 0 85.5 -0.03999999999999487 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf56 2.2528774348 0 84.78166645 0.37833355000000213 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf57 2.32202027362 0 84.4308331 0.4937503500000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf58 2.31110203954 0 84.2150001 0.8174998500000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf59 2.35917135957 0 84.502500525 0.657499475000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf60 2.66632133899 0 85.018332425 0.14166757500000815 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf61 2.36674469012 0 84.307500125 0.6787498125000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf62 1.83623037965 0 84.6108326 0.5491674000000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf63 2.18607703807 0 84.27083445 0.7337483250000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf64 2.93135224398 0 84.974166625 0.18583337500000996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf65 2.59591050603 0 84.765834025 0.3941659750000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf66 2.25801563131 0 84.43833315 0.4825002749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf67 2.37819629574 0 84.24416645 0.773750325000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf68 2.57325940549 0 84.70416665 0.45583335000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf69 2.87421319527 0 84.526665275 0.633334725000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf70 2.23579750603 0 84.715833025 0.44416697500001023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf71 2.09742183942 0 85.1474996 0.012500400000001854 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf72 2.09742183942 0 84.730833175 0.42916682500000436 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf73 2.3293565891 0 84.744999675 0.4150003250000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf74 1.93249146701 0 84.477499425 0.6825005750000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf75 2.61896392029 0 84.756666625 0.4033333750000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf76 2.18607703807 0 84.668333825 0.4916661750000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf77 2.00610810282 0 84.955833075 0.2041669250000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf78 2.90250153505 0 85.110833625 0.0491663750000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf79 2.81925910919 0 84.373333525 0.5799997124999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf80 2.22395915051 0 84.788333875 0.37166612499999874 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf81 2.42513277215 0 84.4791674 0.6808326000000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf82 2.69064816356 0 84.63916665 0.5208333499999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf83 2.75602585333 0 84.75333345 0.4066665500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf84 2.90250153505 0 85.141666525 0.018333475000000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf85 2.50761735877 0 84.514999 0.645001000000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf86 2.65671333449 0 84.705833475 0.45416652499999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf87 2.73050807996 0 84.62583325 0.5341667500000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf88 1.97124355876 0 84.4249988 0.5025018000000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf89 2.90250153505 0 84.8575001 0.30249990000000937 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf90 2.75602585333 0 84.3941676 0.5487486000000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf91 2.520476477 0 85.418333225 0.04166677500000732 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf92 2.31110203954 0 84.375833875 0.5762491875000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf93 2.75602585333 0 85.0075001 0.15249990000000369 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf94 3.21935410129 0 84.358333425 0.6024998625000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf95 2.39753056999 0 84.707500725 0.4524992750000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf96 2.36674469012 0 84.974999525 0.18500047500000394 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf97 2.79256263316 0 85.303333425 0.15666657499999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf98 3.36634957954 0 84.39666595 0.5450010750000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf99 2.40535258985 0 84.7100002 0.44999980000000905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf100 2.44524097268 0 84.6149996 0.5450004000000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf101 2.15883788221 0 84.639999725 0.5200002750000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf102 2.73050807996 0 85.433333075 0.026666925000012054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf103 2.73050807996 0 84.5925011 0.5674988999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf104 1.70856970404 0 85.433327 0.026672999999999558 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf105 2.73050807996 0 84.847499875 0.31250012500000823 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf106 2.23579750603 0 84.71749915 0.4425008500000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf107 2.66632133899 0 84.71916585 0.44083415000000914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf108 2.42513277215 0 84.28750065 0.7087490250000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf109 2.75602585333 0 85.206666825 0.2533331750000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf110 5.11150591832 0 83.79833355 1.4424996750000147 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf111 4.48904909382 0 84.320000225 0.659999662500006 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf112 2.65915985321 0 84.07833405 1.022498925000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf113 2.55546827703 0 83.699167225 1.591249162500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf114 2.70545850471 0 85.671666075 -0.2116660749999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf115 3.39002063361 0 85.46 1.1379786002407855e-14 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf116 3.92713031704 0 84.6025005 0.5574995000000001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf117 3.44499877858 0 85.18999945 0.27000055000000317 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf118 1.82049363128 0 84.433334 0.4899990000000045 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf119 3.55760541076 0 85.04583425 0.1141657500000065 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf120 2.37454984713 0 85.641666 -0.1816659999999956 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf121 4.09568311293 0 83.7225 1.5562500000000128 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf122 4.81915540131 0 84.38833355 0.5574996750000096 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf123 2.92269861556 0 84.120833175 0.9587502375000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf124 2.9489396246 0 83.763333125 1.4950003125000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf125 4.11336811599 0 83.815833275 1.4162500875000035 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf126 5.24794714078 0 83.785832925 1.4612506125000166 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf127 3.02145143763 0 84.1524996 0.9112506000000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf128 4.68889675944 0 84.0608331 1.048750350000013 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf129 3.11726491618 0 84.2183331 0.8125003500000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf130 3.42944293235 0 84.7625002 0.39749979999999996 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf131 2.86589315985 0 85.57166635 -0.11166634999999586 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf132 1.9596080833 0 84.298332425 0.6925013625000105 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf133 3.21617930472 0 84.1633334 0.8949999000000091 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf134 4.94025067672 0 84.1550001 0.9074998500000149 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf135 3.06572586948 0 84.4866671 0.6733328999999998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf136 4.00665156404 0 84.274166525 0.7287502125000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf137 1.82049363128 0 84.166664 0.8900040000000118 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf138 3.08174415116 0 84.457499125 0.45375131250001033 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf139 3.65459743556 0 84.9066661 0.25333390000000977 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf140 2.14786504266 0 84.365833225 0.5912501625000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf141 2.28268924961 0 84.1691664 0.8862504000000158 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf142 3.52359981675 0 85.0375003 0.12249970000000021 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf143 1.81032878247 0 83.433342 1.9899870000000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf144 4.33022330151 0 84.0491668 1.0662498000000156 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf145 3.29121134217 0 83.716666775 1.564999837500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf146 2.95195426078 0 85.093332675 0.06666732500000594 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf147 3.0750779935 0 85.936666825 -0.4766668249999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf148 3.74747795825 0 85.196666675 0.2633333250000021 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf149 2.65915985321 0 84.4224995 0.5062507500000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf150 3.1142881864 0 85.5100001 -0.05000009999999405 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf151 1.9596080833 0 85.2125008 0.2474992000000043 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf152 3.12425627658 0 84.286666275 0.710000587500005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf153 3.38956467165 0 84.857499125 0.3025008750000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf154 3.42944293235 0 83.970834125 1.1837488125000135 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf155 4.78009885505 0 83.987500375 1.1587494375000134 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf156 3.06572586948 0 84.009167425 1.126248862500006 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf157 3.25488617683 0 85.202499925 0.2575000750000072 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf158 3.39383892936 0 84.92416725 0.2358327500000087 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf159 2.71316395691 0 84.80000005 0.35999995000001095 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf160 4.42174155156 0 83.748334025 1.5174989624999995 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf161 3.65459743556 0 85.078332675 0.08166732500000651 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf162 2.8716823693 0 84.841666425 0.3183335750000055 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf163 5.03217204616 0 83.8383343 1.3824985500000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf164 1.58558171041 0 85.099998 0.06000200000000577 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf165 4.68074200425 0 83.841666625 1.3775000625000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf166 3.1883620737 0 84.814999575 0.3450004250000035 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf167 4.57189457086 0 83.503334175 1.8849987374999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf168 4.8992150082 0 83.6158339 1.71624915000001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf169 5.06758855666 0 83.938332725 1.232500912500015 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf170 3.70045496902 0 85.094167475 0.06583252499999903 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf171 4.85009935161 0 83.74416815 1.5237477750000181 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf172 1.88313156795 0 84.26667 0.7399950000000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf173 2.32388897879 0 86.20500055 -0.745000549999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf174 2.70545850471 0 85.20833245 0.2516675500000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf175 3.40522010623 0 84.827500525 0.3324994750000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf176 4.11856269266 0 83.90916675 1.276249875000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf177 3.34760900076 0 84.98499845 0.17500154999999895 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf178 4.39538031203 0 84.335832975 0.636250537500004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf179 1.91380012618 0 85.55583345 -0.09583344999998927 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf180 4.42174155156 0 83.86583365 1.3412495250000092 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf181 5.15619113861 0 83.95916635 1.201250475000002 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf182 3.20891955001 0 85.7524994 -0.2924993999999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf183 4.1422563221 0 84.29833355 0.6924996750000147 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf184 2.42917048367 0 84.279999875 0.7200001875000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf185 2.34066577715 0 84.152500325 0.9112495124999995 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf186 5.11150591832 0 83.97500015 1.1774997750000082 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf187 3.06891619039 0 84.6966661 0.4633339000000035 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf188 3.11726491618 0 85.584167775 -0.12416777499999226 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf189 2.40366321762 0 84.94250095 0.21749905000000924 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf190 3.10452256642 0 85.5149996 -0.05499959999999077 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf191 3.27701048398 0 84.047500025 1.0687499625 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf192 5.11150591832 0 84.306666725 0.6799999125000085 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf193 4.49523657385 0 83.747500025 1.5187499625000171 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf194 4.0126251583 0 84.09333305 1.0000004250000103 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf195 3.39354810299 0 85.572498425 -0.11249842500000112 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf196 4.68889675944 0 84.13833325 0.9325001250000042 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf197 2.23931936018 0 84.62500045 0.5349995500000034 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf198 2.86615984136 0 84.4133332 0.5200002000000126 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf199 3.74747795825 0 83.530000675 1.8449989875000128 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf200 5.20166453319 0 83.9658329 1.1912506500000148 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf201 3.19188638661 0 84.4616671 0.6983329000000055 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf202 2.32402535693 0 85.540833225 -0.08083322499999496 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf203 3.27302525995 0 85.705001075 -0.2450010749999933 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf204 3.85022285799 0 84.412500525 0.5212492125000097 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf205 2.53594802291 0 84.8191675 0.3408324999999991 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf206 2.32202027362 0 85.30666595 0.15333405000000938 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf207 4.46188167251 0 84.245833625 0.7712495625000031 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf208 3.05626197049 0 84.35000015 0.6149997750000082 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf209 2.54246041813 0 85.59333365 -0.13333365000000014 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf210 2.36674469012 0 85.26166675 0.19833325000000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf211 4.85885544059 0 84.03500025 1.0874996250000137 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf212 3.01527286358 0 85.25166645 0.20833355000000325 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf213 3.77626979527 0 84.515833225 0.6441667750000107 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf214 2.48024552199 0 84.81833225 0.3416677500000077 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf215 3.78064739543 0 83.686667275 1.6099990875000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf216 4.0017352815 0 84.64333385 0.5166661500000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf217 3.30967967793 0 85.17083325 0.28916675000000397 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf218 4.1480827235 0 84.016667025 1.114999462500002 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf219 3.89600156508 0 84.155833625 0.9062495625000082 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf220 3.40559661135 0 85.3216663 0.13833370000000117 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf221 3.07821945478 0 83.707499875 1.5787501875000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf222 3.52741090964 0 84.353332275 0.6100015875000082 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf223 2.36674469012 0 85.527500125 -0.06750012499999797 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf224 2.1110784986 0 85.359999775 0.10000022499999944 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf225 4.21929995362 0 84.450000275 0.46499958750001724 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf226 2.64029120901 0 84.650000775 0.5099992250000099 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf227 4.60110969497 0 84.265832875 0.7412506875000062 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf228 5.1195298246 0 83.73333345 1.5399998250000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf229 2.57325940549 0 85.131667 0.028333000000012043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf230 3.80503735759 0 83.965000175 1.1924997375000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf231 4.78009885505 0 83.742500525 1.5262492125000122 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf232 2.79017349628 0 85.048331775 0.11166822500001106 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf233 2.85479601915 0 84.0425001 1.0762498500000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf234 2.45162045063 0 84.321666925 0.6574996125000041 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf235 3.51061454803 0 83.744166775 1.523749837500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf236 2.46382267142 0 85.251666475 0.20833352500001184 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf237 3.40522010623 0 84.095 0.9975000000000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf238 4.48904909382 0 84.18750025 0.8587496250000086 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf239 3.50267448344 0 83.673333375 1.6299999375000098 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf240 3.89600156508 0 84.308333675 0.6774994875000075 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf241 4.42774475186 0 83.916665825 1.2650012625000144 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf242 4.51654937482 0 83.63583415 1.686248775000017 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf243 3.42944293235 0 83.6958342 1.596248700000018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf244 3.57011348802 0 85.546666925 -0.08666692499999157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf245 4.11336811599 0 84.458333575 0.45249963750001143 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf246 4.1715531621 0 84.187499775 0.8587503374999983 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf247 4.55843741409 0 83.9508335 1.2137497500000052 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf248 2.54900634755 0 83.861666275 1.3475005875000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf249 2.09298780883 0 83.450833675 1.96374948750001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf250 3.27701048398 0 83.801666175 1.4375007375000166 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf251 2.51832413986 0 84.13666705 0.934999425000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf252 3.16448600486 0 83.7658337 1.491249450000005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf253 3.32835645011 0 83.675832875 1.6262506875000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf254 2.33141358263 0 83.9124993 1.2712510500000178 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf255 4.27374553867 0 83.818334725 1.4124979125000081 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf256 5.20166453319 0 84.0674998 1.038750300000018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf257 4.23140792301 0 84.044167075 1.0737493875000013 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf258 1.92507021073 0 84.9625009 0.19749910000001025 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf259 3.65459743556 0 84.96083255 0.1991674499999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf260 2.13391055752 0 85.82416905 -0.3641690499999896 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf261 3.2442204033 0 85.1583328 0.0016672000000085285 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf262 2.59819756789 0 83.854999825 1.357500262500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf263 2.49704621765 0 85.134999525 0.02500047500000735 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf264 2.34066577715 0 85.0483337 0.11166630000000455 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf265 3.38956467165 0 84.955000075 0.2049999250000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf266 2.74581611742 0 84.07250065 1.0312490250000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf267 4.94025067672 0 83.68916795 1.6062480750000105 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf268 3.26966664615 0 85.507500275 -0.04750027499999304 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf269 3.19188638661 0 85.40666595 0.053334050000000854 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf270 2.36674469012 0 85.0666672 0.09333280000000743 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf271 3.00947885334 0 84.567500175 0.5924998249999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf272 3.08174415116 0 84.0216675 1.1074987499999978 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf273 2.58475221483 0 83.989166175 1.1562507375000166 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf274 3.8653624219 0 84.400832725 0.5387509125000065 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf275 3.70045496902 0 84.84583205 0.31416795000000663 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf276 3.25488617683 0 85.282499875 0.17750012500000595 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf277 4.29866382416 0 83.777499575 1.473750637500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf278 2.28075482883 0 85.216665825 0.24333417499999827 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf279 4.46188167251 0 84.554166075 0.6058339250000074 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf280 2.59384588291 0 83.8424995 1.376250750000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf281 3.57011348802 0 85.430833625 0.0291663749999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf282 4.749306487 0 84.22583335 0.8012499750000046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf283 2.13553800524 0 85.10166645 0.05833355000000895 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf284 2.46382267142 0 83.962499225 1.1962511625000047 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf285 4.78009885505 0 84.114167425 0.9687488625 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf286 3.20576529027 0 84.00249995 1.1362500750000066 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf287 3.67738324523 0 85.02916775 0.13083225000000598 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf288 3.86487740325 0 84.034167125 1.0887493125000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf289 3.370196653 0 83.727500375 1.5487494374999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf290 3.00947885334 0 84.167499375 0.8887509374999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf291 3.21617930472 0 85.604167575 -0.14416757500000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf292 2.68330363357 0 83.835833525 1.3862497125000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf293 2.2528774348 0 84.1324997 0.9412504500000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf294 2.56208144611 0 83.70083425 1.588748625000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf295 4.23690511031 0 84.03583255 1.0862511750000152 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf296 3.37064741875 0 83.596665525 1.7450017124999988 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf297 3.04987324632 0 84.694167575 0.46583242500001065 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf298 4.1715531621 0 84.387501175 0.5587482375000121 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf299 2.47192966909 0 84.504999275 0.6550007250000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf300 5.07547506828 0 83.71083285 1.5737507250000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf301 3.04987324632 0 84.300833725 0.6887494125000018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf302 3.44136359344 0 85.140833475 0.01916652500001137 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf303 2.35356305584 0 84.982499375 0.17750062500000185 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf304 3.31339824504 0 85.795832875 -0.335832874999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf305 3.39354810299 0 84.9266668 0.23333319999999846 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf306 2.90250153505 0 84.3316671 0.6424993500000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf307 2.58458352145 0 84.3508339 0.613749150000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf308 3.07218125113 0 85.380832525 0.07916747500000271 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf309 4.42774475186 0 83.895833025 1.296250462500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf310 2.46803429972 0 83.817500275 1.413749587500007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf311 4.48904909382 0 83.6108329 1.7237506499999995 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf312 2.53594802291 0 85.054167025 0.10583297500000699 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf313 4.63000451649 0 84.4649997 0.6950002999999981 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf314 2.68114957789 0 84.820833775 0.33916622500000815 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf315 3.34760900076 0 85.23750045 0.22249955000000626 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf316 2.50557750147 0 84.5375006 0.6224994000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf317 4.60761017185 0 83.7708333 1.4837500499999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf318 2.8605404372 0 84.4941671 0.6658329000000066 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf319 2.30748540935 0 85.6141659 -0.15416589999999813 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf320 4.85885544059 0 84.37666675 0.5749998750000103 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf321 2.02960847562 0 85.007501175 0.15249882500000356 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf322 2.95809675652 0 85.295832475 0.1641675250000077 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf323 2.52912274255 0 83.908333625 1.2774995625000116 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf324 3.46562856844 0 85.1783329 0.2816671000000042 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf325 4.35642257412 0 84.435832725 0.4862509125000116 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf326 4.42174155156 0 84.484166675 0.6758333250000078 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf327 3.42944293235 0 85.258334 0.20166600000000018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf328 2.9200817227 0 85.425833725 0.03416627500000119 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf329 2.12312397991 0 85.304999325 0.15500067500000797 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf330 3.39002063361 0 84.330833225 0.6437501624999982 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf331 2.19091471805 0 84.1649996 0.8925006000000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf332 4.35642257412 0 84.2174994 0.8137509000000165 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf333 3.30640319553 0 83.7333332 1.5400002000000015 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf334 3.35104871505 0 85.510834475 -0.050834474999990664 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf335 2.7823322265 0 84.299167475 0.6912487875000011 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf336 1.79160865678 0 83.333336 2.1399960000000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf337 1.82049363128 0 83.633339 1.6899914999999979 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf338 2.86589315985 0 85.0425013 0.11749870000000728 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf339 4.85885544059 0 84.0366665 1.0850002500000144 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf340 2.73557376185 0 85.156665975 0.003334025000009455 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf341 2.8168240943 0 84.806666 0.35333399999999815 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf342 5.02441943023 0 83.959166925 1.2012496124999998 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf343 4.38883904114 0 84.42666685 0.4999997250000021 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf344 3.97476744493 0 84.278334475 0.7224982875000165 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf345 2.58458352145 0 85.45583365 0.004166350000002705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf346 2.51626683335 0 85.00499975 0.15500025000000905 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf347 3.89600156508 0 83.8199999 1.410000150000009 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf348 4.67403368929 0 84.041666625 1.0775000625000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf349 3.65869733242 0 84.625833975 0.534166024999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf350 3.44499877858 0 84.2925001 0.7012498500000106 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf351 2.33141358263 0 85.58333305 -0.12333304999998801 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf352 4.38883904114 0 83.934167675 1.2387484875000112 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf353 3.04357800698 0 85.43666725 0.023332750000005842 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf354 4.42774475186 0 83.994167575 1.148748637499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf355 4.06183643479 0 83.865833225 1.3412501625000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf356 2.49704621765 0 83.978333525 1.172499712500013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf357 2.29508685841 0 85.124999725 0.03500027500001013 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf358 3.88013266186 0 83.978333325 1.1725000125000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf359 3.30640319553 0 85.405833675 0.05416632500000845 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf360 3.19188638661 0 83.76416665 1.493750024999997 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf361 4.11336811599 0 84.49083295 0.6691670500000072 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf362 3.52359981675 0 84.929999375 0.23000062500001095 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf363 3.65869733242 0 84.155833425 0.9062498625000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf364 2.63326868545 0 83.545833975 1.8212490375000172 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf365 2.73835708776 0 84.59083395 0.5691660500000012 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf366 1.94407886078 0 84.159166525 0.9012502125000026 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf367 3.27701048398 0 84.339999725 0.63000041250001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf368 2.8439887133 0 83.61499925 1.7175011250000125 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf369 2.98806898383 0 84.2358331 0.7862503500000173 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf370 5.20166453319 0 83.64583405 1.6712489250000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf371 2.38597460634 0 85.628333575 -0.16833357499999407 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf372 3.20576529027 0 84.0533323 1.0600015500000168 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf373 3.48571779877 0 85.29583275 0.1641672500000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf374 3.12725210162 0 83.810833375 1.4237499375000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf375 5.00735701676 0 83.70333305 1.5850004250000111 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf376 2.7586587916 0 84.016666475 1.115000287500017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf377 3.59623494226 0 84.02083355 1.1087496749999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf378 3.07218125113 0 84.863333575 0.2966664250000065 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf379 5.15619113861 0 84.0725 1.03125 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf380 3.30640319553 0 83.986665925 1.1600011125000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf381 2.93135224398 0 84.207499925 0.8287501125000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf382 4.85885544059 0 83.6624998 1.6462502999999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf383 3.48571779877 0 83.728333825 1.547499262499997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf384 3.39002063361 0 83.503332775 1.8850008375000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf385 4.42174155156 0 84.431667375 0.4924989375000024 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf386 4.67403368929 0 83.978332625 1.172501062500018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf387 3.46562856844 0 85.096666525 0.06333347500000175 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf388 5.1195298246 0 83.626666625 1.7000000625000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf389 3.42944293235 0 83.7425005 1.5262492499999993 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf390 4.1480827235 0 83.877500325 1.323749512500008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf391 2.94627555145 0 83.535000025 1.8374999625000044 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf392 3.48199621824 0 85.194167075 0.2658329250000094 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf393 5.29506075557 0 83.740000375 1.5299994375000168 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf394 3.39002063361 0 85.356666 0.10333400000000098 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf395 2.61686247873 0 83.76666665 1.4900000250000005 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf396 3.80013422222 0 83.9200003 1.2599995500000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf397 3.84520265677 0 84.26749955 0.7387506750000128 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf398 3.85022285799 0 84.3808335 0.5687497500000163 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf399 1.88204484827 0 84.4200002 0.5099997000000016 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf400 3.20576529027 0 83.7333335 1.5399997500000069 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf401 4.8661051796 0 83.832500275 1.3912495875000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf402 2.55546827703 0 85.2574993 0.2025006999999988 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf403 2.60969359339 0 85.549166275 -0.08916627499999946 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf404 3.15056754597 0 84.719165625 0.4408343750000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf405 3.44136359344 0 84.57583365 0.5841663499999982 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf406 2.56229458097 0 83.766666575 1.4900001375000045 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf407 4.94025067672 0 84.08000015 1.0199997750000023 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf408 4.0017352815 0 84.265833375 0.7412499375000081 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf409 2.81662374994 0 85.6191671 -0.1591670999999934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf410 2.84104486338 0 83.69583365 1.5962495250000117 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf411 3.84974163103 0 83.6866675 1.6099987500000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf412 3.32835645011 0 85.147500375 0.012499624999998127 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf413 3.20535754348 0 84.6524999 0.50750010000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf414 3.92713031704 0 84.51583365 0.6441663500000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf415 3.65459743556 0 84.525833325 0.6341666750000116 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf416 4.42174155156 0 84.50833295 0.6516670500000089 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf417 3.10452256642 0 84.8075001 0.3524999000000065 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf418 3.56620961403 0 85.090833625 0.06916637500000322 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf419 2.61205424073 0 84.2916662 0.7025007000000159 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf420 2.30954242221 0 83.91000015 1.2749997750000048 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf421 2.32388897879 0 85.4341668 0.025833200000005274 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf422 3.16448600486 0 85.372500475 0.08749952500000974 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf423 2.68114957789 0 83.6916668 1.6024998000000181 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf424 3.40166834257 0 83.9191672 1.2612492000000017 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf425 2.32953276127 0 85.559166375 -0.09916637499999864 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf426 2.93135224398 0 84.9558331 0.20416689999999849 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf427 2.98202604677 0 83.9716663 1.1825005500000145 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf428 4.29300531776 0 84.635832975 0.5241670250000056 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf429 4.46188167251 0 83.8666668 1.339999800000001 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf430 2.29313137734 0 83.5649998 1.7925003000000146 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf431 3.07180677592 0 85.439167075 0.02083292500000483 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf432 3.66367779072 0 83.75999915 1.5000012750000096 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf433 4.94025067672 0 84.2616668 0.747499800000007 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf434 4.85885544059 0 83.932499875 1.2412501875000004 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf435 3.28789331155 0 84.913333525 0.24666647500000638 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf436 5.11150591832 0 83.850832525 1.3637512125000057 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf437 3.48571779877 0 84.99583365 0.16416635000001067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf438 2.42513277215 0 85.244167075 0.21583292500001222 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf439 3.17463312783 0 84.883333125 0.2766668750000122 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf440 3.74747795825 0 84.98666605 0.1733339500000085 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf441 2.90250153505 0 84.057500025 1.0537499625000137 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf442 3.04987324632 0 84.386666325 0.560000512500018 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf443 3.05943261456 0 83.763333325 1.4950000125000003 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf444 2.98202604677 0 84.424166475 0.5037502874999973 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf445 3.73279362334 0 84.47166705 0.6883329500000116 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf446 4.28054278574 0 84.365000175 0.592499737499999 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf447 4.749306487 0 84.19499995 0.8475000750000135 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf448 2.41129462935 0 85.37333335 0.08666665000000934 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf449 4.62205314759 0 84.094166925 0.9987496125000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf450 3.58748542243 0 83.93999855 1.2300021750000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf451 3.35868172117 0 84.358334225 0.6024986625000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf452 3.2442204033 0 83.892500775 1.3012488375000046 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf453 3.47749717017 0 84.054167525 1.0587487125000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf454 4.6004224797 0 83.909999675 1.2750004875000158 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf455 2.74581611742 0 83.711667625 1.5724985625000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf456 3.55728584143 0 82.854999725 2.857500412500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf457 4.40789063084 0 84.004166675 1.1337499875000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf458 3.23000726883 0 83.2958329 2.1962506500000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf459 3.90066133282 0 83.319168 2.1612480000000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf460 4.68074200425 0 83.734165525 1.5387517125000159 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf461 3.25488617683 0 84.3124998 0.6712503000000112 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf462 4.17689583184 0 84.9249996 0.23500039999999844 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf463 3.80013422222 0 84.0683336 1.0374996000000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf464 3.80013422222 0 83.8733328 1.3300008000000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf465 3.53595970915 0 83.400833725 2.0387494125000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf466 4.56481779115 0 83.8733324 1.3300014000000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf467 4.27374553867 0 83.748333725 1.5174994125000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf468 3.0122585054 0 83.455833075 1.9562503875000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf469 4.78711530883 0 83.65750105 1.6537484250000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf470 2.69064816356 0 83.47416785 1.9287482250000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf471 2.97872563985 0 84.6950001 0.4649999000000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf472 2.96979047613 0 83.0250004 2.6024994000000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf473 3.12725210162 0 83.171666475 2.382500287500015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf474 3.38956467165 0 85.40000015 0.05999985000000835 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf475 5.50155459048 0 83.673333075 1.6300003875000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf476 4.56481779115 0 83.7641668 1.4937498000000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf477 4.49523657385 0 83.826667975 1.3999980374999978 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf478 4.17689583184 0 83.59416615 1.7487507749999978 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf479 4.77162407078 0 83.575000575 1.7774991375000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf480 3.84974163103 0 83.948332575 1.2175011375000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf481 4.36224960626 0 83.728333275 1.547500087500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf482 3.61386264477 0 83.7358343 1.5362485500000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf483 5.1195298246 0 83.2549999 2.2575001500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf484 3.51061454803 0 84.35999905 0.6000014250000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf485 4.68074200425 0 84.0499998 1.0650003000000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf486 3.70465846079 0 84.597500425 0.5624995749999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf487 2.99080920887 0 83.2541668 2.258749800000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf488 3.31339824504 0 84.869167175 0.29083282500000396 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf489 3.0527280537 0 84.244166775 0.7737498375000129 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf490 4.49523657385 0 83.6258349 1.7012476500000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf491 2.03940354341 0 83.530000975 1.8449985375000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf492 3.30234620526 0 83.769999675 1.4850004875000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf493 4.11856269266 0 82.9658334 2.6912499000000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf494 2.56430926229 0 83.56583275 1.79125087500001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf495 4.06183643479 0 83.384999475 2.0625007875000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf496 3.54436876665 0 83.98666645 1.1600003250000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf497 4.49523657385 0 84.877499925 0.28250007500001006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf498 3.07180677592 0 84.006665775 1.1300013375000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf499 4.41448885793 0 84.739167225 0.4208327750000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf500 4.8661051796 0 83.76833265 1.4875010250000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf501 3.03083319426 0 83.9025001 1.2862498500000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf502 4.39475317355 0 83.85416775 1.3587483750000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf503 3.13382775829 0 83.087500375 2.5087494375000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf504 4.8661051796 0 83.73499935 1.5375009750000146 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf505 4.40131209873 0 83.6399992 1.6800011999999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf506 4.71064890814 0 83.84750095 1.3687485750000121 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf507 2.66150866558 0 83.75666565 1.5050015250000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf508 2.86589315985 0 83.689166825 1.6062497625000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf509 4.53672840544 0 83.38583315 2.0612502750000132 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf510 3.39354810299 0 83.37083375 2.0837493750000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf511 4.29866382416 0 83.6133328 1.7200008000000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf512 2.64960329927 0 83.56416675 1.7937498750000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf513 5.1195298246 0 83.70499985 1.5825002250000182 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf514 4.62205314759 0 84.531667025 0.6283329750000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf515 3.51061454803 0 84.77166675 0.38833325000001084 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf516 4.56481779115 0 83.193332675 2.3500009875000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf517 4.47477396827 0 83.7516658 1.512501300000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf518 3.80013422222 0 83.5333328 1.8400008000000128 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf519 3.29121134217 0 83.777499575 1.473750637500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf520 4.17098826512 0 84.67083305 0.4891669500000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf521 4.42774475186 0 83.91999975 1.2600003750000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf522 4.98960168004 0 83.666666775 1.6399998375000138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf523 5.40080120652 0 83.67083355 1.6337496750000113 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf524 4.23690511031 0 84.887500325 0.2724996750000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf525 2.93433101084 0 84.92166715 0.23833285000000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf526 4.68074200425 0 83.80833305 1.4275004250000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf527 4.1533654053 0 83.0999996 2.490000600000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf528 3.52741090964 0 84.39583325 0.5462501250000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf529 3.29121134217 0 85.70333195 -0.24333195000000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf530 4.8661051796 0 83.547500225 1.818749662500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf531 3.15056754597 0 84.0058332 1.1312502000000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf532 4.32387438839 0 82.97166645 2.6825003250000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf533 2.80584737113 0 83.8400008 1.3799988000000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf534 3.8397168061 0 83.173333575 2.3799996375000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf535 4.00665156404 0 84.0058329 1.1312506500000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf536 5.60613859814 0 83.570832975 1.783750537500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf537 2.49282474144 0 83.272499025 2.231251462500005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf538 3.99579404276 0 83.340832675 2.1287509875000055 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf539 3.3549420805 0 83.699166625 1.5912500625000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf540 4.60761017185 0 83.509999925 1.875000112500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf541 2.39753056999 0 83.39333175 2.0500023750000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf542 2.99080920887 0 83.24750015 2.2687497750000176 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf543 3.51061454803 0 84.308333575 0.6774996374999986 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf544 4.27374553867 0 83.973333025 1.1800004625000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf545 4.33022330151 0 83.722499325 1.556251012500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf546 4.56481779115 0 83.53500085 1.8374987250000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf547 4.09568311293 0 84.77583305 0.38416695000000234 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf548 2.92269861556 0 83.676667275 1.624999087500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf549 3.90066133282 0 84.114167575 0.9687486375000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf550 2.30748540935 0 85.19333455 0.26666545000000214 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf551 3.81982297414 0 83.2883327 2.2075009500000107 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf552 2.48647461628 0 84.136666175 0.9350007375000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf553 4.06183643479 0 84.096666475 0.9950002874999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf554 3.29121134217 0 83.677498825 1.6237517625000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf555 4.62205314759 0 83.57666685 1.774999725000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf556 4.42774475186 0 83.749999975 1.515000037500016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf557 3.46931522498 0 84.18249955 0.8662506750000034 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf558 2.16999135568 0 84.5841665 0.5758335000000102 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf559 3.80013422222 0 83.679166625 1.6212500625000175 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf560 4.48157410599 0 83.7274995 1.5487507500000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf561 4.23690511031 0 84.0941674 0.9987489000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf562 5.40080120652 0 83.426666225 2.0000006624999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf563 4.36224960626 0 83.5524996 1.811250600000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf564 5.19163058913 0 83.597499825 1.7437502625000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf565 3.57877837384 0 84.496667225 0.6633327750000092 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf566 3.93186488986 0 83.839999775 1.3800003375000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf567 4.41448885793 0 83.87999955 1.320000675000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf568 3.24057963994 0 84.304166675 0.6837499875000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf569 3.61386264477 0 84.019999425 1.1100008625000157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf570 5.1195298246 0 83.2475002 2.2687497000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf571 4.8661051796 0 83.777500025 1.4737499625000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf572 3.39354810299 0 82.96583305 2.691250425000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf573 5.45071234571 0 83.635833125 1.6862503125000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf574 3.47749717017 0 84.45083235 0.46375147500000935 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf575 4.36224960626 0 84.5974989 0.5625011000000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf576 3.65869733242 0 85.481666225 -0.021666224999992767 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf577 5.20997422309 0 83.5750004 1.7774994000000177 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf578 3.05595874161 0 83.3591666 2.1012501000000157 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf579 3.80013422222 0 84.399166125 0.5412508125000173 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf580 3.74226721787 0 84.9525001 0.2074999000000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf581 3.24771715301 0 84.51416745 0.645832550000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf582 4.00665156404 0 83.50249935 1.8862509750000171 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf583 2.80051833724 0 84.685833875 0.474166125000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf584 4.56481779115 0 82.9549998 2.7075003000000137 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf585 3.24057963994 0 84.2116667 0.8224999500000152 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf586 4.21269566507 0 83.9266659 1.2500011500000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf587 3.39354810299 0 83.1466669 2.4199996500000083 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf588 5.20997422309 0 83.466666825 1.9399997625000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf589 3.73279362334 0 83.930000175 1.2449997375000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf590 4.18876131089 0 84.155000125 0.9074998125000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf591 3.88524454546 0 84.22166635 0.8075004749999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf592 3.57011348802 0 84.1708333 0.8837500500000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf593 3.84974163103 0 83.740833875 1.5287491874999972 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf594 3.52741090964 0 85.19999995 0.2600000499999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf595 2.0006931511 0 83.368333425 2.087499862500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf596 2.39952284113 0 83.755833075 1.5062503875000104 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf597 5.03217204616 0 83.551667425 1.8124988625 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf598 2.3293565891 0 85.089168 0.07083200000000434 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf599 3.23709823473 0 83.13499875 2.4375018750000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf600 3.09112547159 0 84.906667725 0.25333227499999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf601 4.44108049557 0 83.01749995 2.613750075000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf602 4.71064890814 0 83.3824997 2.0662504500000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf603 4.8661051796 0 83.372500575 2.081249137500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf604 3.19840480446 0 84.269166725 0.7362499124999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf605 4.06183643479 0 83.374165925 2.078751112500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf606 3.46931522498 0 83.883333175 1.3150002375000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf607 3.65869733242 0 84.564166025 0.5958339749999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf608 3.84974163103 0 84.50499975 0.655000250000009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf609 3.0060522373 0 83.2666666 2.2400001000000174 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf610 4.29866382416 0 84.190832875 0.8537506875000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf611 2.52912274255 0 84.520834775 0.6391652250000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf612 3.65869733242 0 85.08833295 0.07166705000001061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf613 2.93426889145 0 83.395833075 2.0462503875000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf614 3.72807479022 0 84.123332375 0.9550014375000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf615 5.03217204616 0 83.560833775 1.7987493374999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf616 4.11281886651 0 84.95750045 0.2024995500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf617 3.0750779935 0 85.0858345 0.07416550000000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf618 4.17689583184 0 83.297500675 2.1937489875000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf619 2.69556689425 0 84.925000575 0.2349994250000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf620 3.02145143763 0 83.3250005 2.1524992500000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf621 3.535869506 0 85.07499905 0.08500095000000274 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf622 3.2442204033 0 84.0949995 0.9975007500000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf623 3.0184247196 0 83.844166 1.3737510000000057 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf624 4.28616846517 0 84.8858338 0.2741662000000048 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf625 4.02854458864 0 83.6858329 1.6112506500000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf626 2.63299356219 0 83.69916615 1.591250775000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf627 3.80013422222 0 83.6391668 1.6812498000000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf628 3.61386264477 0 84.3174999 0.6637501500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf629 4.78711530883 0 83.6666668 1.6399998000000053 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf630 2.93433101084 0 85.0549992 0.10500080000000767 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf631 4.24911447842 0 83.8516655 1.362501750000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf632 5.1195298246 0 83.624166275 1.7037505875000178 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf633 4.00665156404 0 83.2800001 2.219999850000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf634 4.8661051796 0 83.880833225 1.3187501625000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf635 3.32835645011 0 83.3925005 2.051249250000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf636 4.29866382416 0 84.145833375 0.9212499375000149 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf637 2.65671333449 0 83.742498925 1.5262516124999976 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf638 3.0750779935 0 84.079998875 1.020001687500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf639 5.20997422309 0 83.537500425 1.8337493625000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf640 3.17089287752 0 83.099998875 2.490001687500012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf641 4.63658695371 0 83.5449995 1.8225007500000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf642 3.3549420805 0 83.34 2.1300000000000026 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf643 3.75178902153 0 84.438333425 0.4824998625000134 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf644 3.35104871505 0 84.717499525 0.44250047500001133 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf645 2.34416917693 0 84.8849997 0.27500030000001063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf646 2.5118891277 0 82.94666695 2.719999575000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf647 4.94774553187 0 83.433333375 1.9899999375000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf648 4.42774475186 0 83.6849998 1.6125003000000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf649 3.43305296381 0 83.0583327 2.5525009500000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf650 4.6004224797 0 83.8491669 1.3662496500000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf651 3.31720456502 0 83.159166925 2.401249612500017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf652 3.95294609782 0 84.73250125 0.4274987500000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf653 5.45071234571 0 83.452499775 1.9612503374999974 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf654 4.71064890814 0 83.414999175 2.0175012374999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf655 4.29866382416 0 83.088333125 2.5075003124999995 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf656 2.83285757459 0 84.964999975 0.19500002500000735 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf657 2.93135224398 0 83.37833325 2.072500125000012 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf658 4.27374553867 0 83.634166725 1.6887499125000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf659 3.73279362334 0 83.000833325 2.6387500125000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf660 3.36634957954 0 83.085833475 2.511249787500006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf661 2.7586587916 0 83.30083345 2.1887498250000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf662 4.64388818634 0 83.463332725 1.9450009125000065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf663 3.36634957954 0 84.3250002 0.6524996999999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf664 4.6659305061 0 83.376666675 2.0749999875000142 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf665 4.23690511031 0 84.83333285 0.32666715000000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf666 2.93135224398 0 84.160833275 0.8987500875000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf667 2.95195426078 0 84.90749935 0.2525006500000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf668 4.53672840544 0 83.541666425 1.827500362500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf669 2.34416917693 0 84.857499925 0.3025000750000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf670 3.48984270518 0 84.41999995 0.5100000750000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf671 3.0527280537 0 83.764167575 1.4937486375000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf672 3.25488617683 0 83.087500625 2.5087490625000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf673 4.71064890814 0 83.144999575 2.4225006375000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf674 3.26931959511 0 85.326667 0.13333300000000464 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf675 5.20997422309 0 83.419166925 2.011249612500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf676 3.43696886837 0 84.8083328 0.35166720000000284 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf677 2.68574735107 0 83.555000175 1.8074997375000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf678 4.33659088676 0 84.000833925 1.1387491125000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf679 3.46931522498 0 84.7716667 0.3883333000000079 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf680 4.45449681271 0 83.47833315 1.9225002750000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf681 5.1195298246 0 83.4533334 1.9599998999999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf682 4.11856269266 0 84.7766674 0.38333260000001135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf683 3.59623494226 0 83.344999875 2.122500187500009 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf684 2.42917048367 0 85.255833125 0.2041668750000099 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf685 3.93186488986 0 84.61999955 0.5400004500000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf686 3.84974163103 0 83.789999375 1.4550009375000172 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf687 3.97425458366 0 83.988333325 1.1575000125000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf688 3.8397168061 0 84.514167225 0.6458327750000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf689 4.09568311293 0 83.275000525 2.227499212500014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf690 3.56149045943 0 84.91583295 0.24416705000001004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf691 4.42774475186 0 83.76000005 1.4999999250000045 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf692 4.22476570574 0 83.904999775 1.282500337500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf693 4.11856269266 0 83.91000025 1.2749996250000137 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf694 5.1195298246 0 83.49333325 1.9000001249999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf695 2.67115144897 0 83.42999995 1.9950000750000143 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf696 4.23690511031 0 84.7383333 0.4216667000000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf697 4.40131209873 0 83.79416645 1.448750324999999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf698 4.99805803481 0 83.810000025 1.4249999625000171 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf699 3.2442204033 0 83.42666685 1.9999997250000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf700 4.57189457086 0 83.818333625 1.4124995625000167 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf701 4.60761017185 0 83.0674999 2.5387501500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf702 3.07821945478 0 84.765834275 0.394165725000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf703 4.36224960626 0 84.02166645 1.1075003250000108 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf704 2.76636700953 0 84.60416665 0.5558333500000089 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf705 2.34790180657 0 85.303332675 0.15666732500001218 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf706 3.15056754597 0 84.009166575 1.1262501375000156 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf707 4.49523657385 0 84.54749905 0.6125009500000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf708 4.03958106713 0 85.1300001 0.02999990000000141 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf709 5.07547506828 0 83.512499625 1.871250562500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf710 4.63658695371 0 83.60999985 1.7250002250000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf711 3.90066133282 0 83.80166665 1.4375000250000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf712 3.40522010623 0 84.083332975 1.0150005375000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf713 4.55072975326 0 83.676665425 1.6250018625000138 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf714 2.49700123309 0 84.32166715 0.6574992750000135 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf715 3.55728584143 0 83.1108332 2.473750200000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf716 4.55072975326 0 83.798333375 1.4424999375000098 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf717 4.21269566507 0 83.65583315 1.6562502749999979 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf718 3.04334411031 0 84.176667425 0.8749988625 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf719 2.61896392029 0 83.72333265 1.5550010250000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf720 5.60613859814 0 83.339999575 2.130000637500018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf721 4.91476285223 0 83.251666825 2.2624997625000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf722 4.95606048827 0 83.671666925 1.6324996125000126 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf723 3.0060522373 0 83.262499775 2.2462503375000153 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf724 3.94237731198 0 84.794999925 0.3650000750000061 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf725 3.24779325302 0 83.64083305 1.6787504250000111 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf726 3.03083319426 0 84.04750105 1.0687484250000168 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf727 4.28616846517 0 84.5724998 0.5875002000000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf728 2.91685555044 0 83.389167125 2.0562493125000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf729 2.88270158233 0 84.89833305 0.26166695000000006 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf730 3.25488617683 0 82.98416655 2.663750175000011 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf731 4.23690511031 0 83.139999175 2.430001237500008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf732 5.45071234571 0 83.36999975 2.085000375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf733 3.45315232558 0 82.9650004 2.692499400000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf734 2.48024552199 0 83.764166825 1.493749762500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf735 3.51488775582 0 82.97500075 2.6774988749999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf736 5.1195298246 0 83.901665725 1.2875014125000064 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf737 3.16076961237 0 83.37416625 2.078750625000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf738 3.46931522498 0 83.415000625 2.0174990625000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf739 5.03217204616 0 83.3558337 2.10624945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf740 5.20997422309 0 83.76499975 1.4925003750000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf741 3.29121134217 0 84.9216665 0.23833350000000453 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf742 3.76626526312 0 82.96916675 2.6862498750000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf743 3.39737435359 0 84.125833175 0.9512502375000125 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf744 3.42897630477 0 83.886666875 1.309999687500003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf745 3.84974163103 0 84.352499825 0.6112502625000147 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf746 3.36634957954 0 84.6774997 0.48250030000000665 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf747 4.71064890814 0 83.726666975 1.5499995375000069 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf748 3.0527280537 0 83.5691671 1.7862493500000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf749 4.29866382416 0 83.41666675 2.014999875000001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf750 3.8699491435 0 84.844166325 0.3158336750000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf751 4.68074200425 0 83.453333275 1.9600000874999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf752 3.84974163103 0 84.13833275 0.9325008750000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf753 4.23690511031 0 84.0374998 1.0837502999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf754 3.73279362334 0 83.38416615 2.0637507750000097 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf755 3.3549420805 0 83.305833625 2.1812495624999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf756 2.93135224398 0 83.381666725 2.067499912500004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf757 4.23690511031 0 83.1691663 2.386250550000007 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf758 5.1643560615 0 83.50833265 1.877501025000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf759 3.84974163103 0 83.7841657 1.4637514500000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf760 4.21269566507 0 83.945000075 1.222499887500014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf761 3.75178902153 0 84.19999985 0.8400002250000114 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf762 3.93186488986 0 83.3824991 2.0662513500000017 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf763 3.78064739543 0 83.970832525 1.183751212499999 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf764 3.90066133282 0 83.275000075 2.2274998875000165 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf765 4.36224960626 0 82.970831925 2.6837521125000094 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf766 4.40131209873 0 84.19666645 0.8450003250000151 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf767 3.77580687597 0 84.63999925 0.5200007500000027 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf768 3.40522010623 0 84.372499775 0.5812503375000162 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf769 3.37413466519 0 83.10833335 2.477499975000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf770 4.36224960626 0 83.624166575 1.703750137500002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf771 2.69064816356 0 83.3299999 2.1450001500000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf772 3.17089287752 0 83.14083245 2.4287513250000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf773 2.76636700953 0 83.49833355 1.8924996750000105 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf774 3.90066133282 0 84.675833325 0.4841666750000059 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf775 3.46931522498 0 82.96750055 2.6887491750000123 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf776 3.44499877858 0 84.069166625 1.0362500625000166 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf777 3.72807479022 0 84.8000002 0.35999980000000564 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf778 3.84974163103 0 83.89249975 1.3012503750000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf779 3.39354810299 0 83.616666525 1.7150002125000086 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf780 4.78711530883 0 83.7558332 1.5062502000000109 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf781 3.21935410129 0 85.51583235 -0.05583234999999148 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf782 4.36224960626 0 84.7341658 0.4258342000000056 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf783 4.09568311293 0 84.338332525 0.63250121250001 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf784 3.96890595417 0 84.714166325 0.4458336750000115 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf785 3.90066133282 0 84.400833425 0.5387498625000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf786 2.87421319527 0 85.265833675 0.19416632500000902 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp 31 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf787 4.33659088676 0 83.624999175 1.7025012375000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf788 4.78711530883 0 83.92583345 1.2512498250000093 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf789 2.39753056999 0 84.877501875 0.28249812500001215 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf790 4.55072975326 0 84.004165775 1.1337513375000015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf791 3.08465896376 0 85.410834325 0.04916567500000896 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf792 4.53672840544 0 84.04999965 1.065000525000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv samp 35 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf793 3.72807479022 0 84.30833215 0.6775017750000103 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf794 5.07547506828 0 83.721666125 1.5575008125000096 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf795 5.20997422309 0 83.57750015 1.7737497749999989 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf796 4.00665156404 0 83.979166625 1.1712500625000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf797 3.9635717019 0 83.843332025 1.375001962500015 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf798 2.81925910919 0 83.48583365 1.9112495250000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf799 2.0622213797 0 84.307499875 0.6787501875000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp 36 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf800 4.1533654053 0 83.934999825 1.2375002624999993 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ -+++++ -conf801 4.06183643479 0 83.36000025 2.0999996250000095 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv samp 32 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv samp 33 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges.txt deleted file mode 100644 index 719b7b3624dc0ce1f1a8a2436799cff6e4c0c2c0..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges.txt +++ /dev/null @@ -1,8 +0,0 @@ --1.8816435 2.0934134 -0.5421946 0.3710851 -0.06697306 0.040868897 -0.775027394891 0.779944300652 --0.775027394891 0.779944300652 -0.42474225 0.31460348 -0.3557253 -0.17281663 -0.808667064309 0.983953297734 --0.808667064309 0.983953297734 -0.44134507 0.79587924 -0.80424446 0.75330096 -0.995678424835 0.998566448689 --0.995678424835 0.998566448689 -0.2883836 0.31025785 -0.6353164 0.29015934 -0.993219196796 0.992379009724 --0.993219196796 0.992379009724 -0.2792431 0.37689754 -1.1379756 1.2391574 -0.999901354313 0.999910891056 --0.999901354313 0.999910891056 -0.27078503 0.27942517 -0.503003 0.12762362 -0.991036117375 0.971404970288 --0.991036117375 0.971404970288 -0.24273404 0.5845544 -0.53745 0.558251 -119.27973732 -25.2262819576 -0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt deleted file mode 100644 index 488c5521dce160487ef3f3ee149914047f6274b1..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/quant_ranges_rt.txt +++ /dev/null @@ -1,8 +0,0 @@ -1 -1.8816435 2.0934134 -0.5421946 0.3710851 -0.06697306 0.040868897 -0.775027394891 0.779944300652 -2 -0.775027394891 0.779944300652 -0.42474225 0.31460348 -0.3557253 -0.17281663 -0.808667064309 0.983953297734 -3 -0.808667064309 0.983953297734 -0.44134507 0.79587924 -0.80424446 0.75330096 -0.995678424835 0.998566448689 -4 -0.995678424835 0.998566448689 -0.2883836 0.31025785 -0.6353164 0.29015934 -0.993219196796 0.992379009724 -5 -0.993219196796 0.992379009724 -0.2792431 0.37689754 -1.1379756 1.2391574 -0.999901354313 0.999910891056 -6 -0.999901354313 0.999910891056 -0.27078503 0.27942517 -0.503003 0.12762362 -0.991036117375 0.971404970288 -7 -0.991036117375 0.971404970288 -0.24273404 0.5845544 -0.53745 0.558251 -119.27973732 -25.2262819576 -8 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/run_data/out-run-1 b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/run_data/out-run-1 deleted file mode 100644 index daddbdd93b29c04a381f5ec8101c66d3498c396e..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/run_data/out-run-1 +++ /dev/null @@ -1,45868 +0,0 @@ -size_in_bytes = 3456 -DEBUG: ***--- size_in_bytes = 3456 -DEBUG: Attempting to Allocate = 3456 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 27, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 128 -DEBUG: ***--- size_in_bytes = 128 -DEBUG: Attempting to Allocate = 128 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 36864 -DEBUG: ***--- size_in_bytes = 36864 -DEBUG: Attempting to Allocate = 36864 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 288, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 128 -DEBUG: ***--- size_in_bytes = 128 -DEBUG: Attempting to Allocate = 128 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 73728 -DEBUG: ***--- size_in_bytes = 73728 -DEBUG: Attempting to Allocate = 73728 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 288, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 256 -DEBUG: ***--- size_in_bytes = 256 -DEBUG: Attempting to Allocate = 256 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 64, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 147456 -DEBUG: ***--- size_in_bytes = 147456 -DEBUG: Attempting to Allocate = 147456 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 576, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 256 -DEBUG: ***--- size_in_bytes = 256 -DEBUG: Attempting to Allocate = 256 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 64, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 294912 -DEBUG: ***--- size_in_bytes = 294912 -DEBUG: Attempting to Allocate = 294912 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 576, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 512 -DEBUG: ***--- size_in_bytes = 512 -DEBUG: Attempting to Allocate = 512 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 128, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 589824 -DEBUG: ***--- size_in_bytes = 589824 -DEBUG: Attempting to Allocate = 589824 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 1152, cStride = 9, hStride = 3, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 512 -DEBUG: ***--- size_in_bytes = 512 -DEBUG: Attempting to Allocate = 512 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 128, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 81920 -DEBUG: ***--- size_in_bytes = 81920 -DEBUG: Attempting to Allocate = 81920 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 20480, cStride = 20480, hStride = 10, wStride = 1 -DEBUG: tensor->data_format = 0 -size_in_bytes = 40 -DEBUG: ***--- size_in_bytes = 40 -DEBUG: Attempting to Allocate = 40 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -INITIALIZING GPU 0 -CREATED HANDLES 0 -INFO: -WARNING: File 'opentuner_flags' not found - - -initializing tuner .... -* LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm -- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -*LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -Read PROMISE FLAGS 0 -DONE INTIALIZING GPU 0 -INFO: Reading Quantization Ranges File... -INFO: DONE. -INFO: Reading Configuration File... -DEBUG: first_line: 2000 -DEBUG: Baseline time: 2000.000000 - -DEBUG: line: +++++ -DEBUG: t: +++++ -DEBUG: -DEBUG: line: conf1 2.64294896823 0 84.24999995 -0.05999995000000524 -DEBUG: t: conf1 -DEBUG: t: 2.64294896823 -DEBUG: t: 0 -DEBUG: t: 84.24999995 -DEBUG: t: -0.05999995000000524 -DEBUG: -DEBUG: line: 1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -DEBUG: t: 1 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 1 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -DEBUG: t: 2 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: pool_max -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 4 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found pool_max operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -DEBUG: t: 3 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 8 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -DEBUG: t: 4 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: pool_max -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 11 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found pool_max operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -DEBUG: t: 5 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 15 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -DEBUG: t: 6 -DEBUG: t: gpu -DEBUG: t: conv -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: tanh -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: pool_max -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 18 - -DEBUG: Found conv operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found tanh operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found pool_max operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 7 gpu mul fp32 1 add fp32 1 -DEBUG: t: 7 -DEBUG: t: gpu -DEBUG: t: mul -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: t: add -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 22 - -DEBUG: Found mul operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: Found add operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: 8 gpu softmax fp32 1 -DEBUG: t: 8 -DEBUG: t: gpu -DEBUG: t: softmax -DEBUG: t: fp32 -DEBUG: t: 1 -DEBUG: -DEBUG: Found gpu configuration -DEBUG: *** firstTensorID = 24 - -DEBUG: Found softmax operation -DEBUG: Found fp32 option -DEBUG: fp32 parameter: 1, ignoring -DEBUG: line: ----- -DEBUG: t: ----- -DEBUG: -DEBUG: DONE. -INFO: Sorting autotuner configurations... -INFO: Done sorting. -INFO: Speedup Configurations -+++++ -conf1 2.642949 0.000000 84.250000 -0.060000 -1 : gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 : gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 : gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 : gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 : gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 : gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 : gpu mul fp32 1 add fp32 1 -8 : gpu softmax fp32 1 ------ -DEBUG: slowdowns file not found. Initializing slowdowns randomly. -*LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -* LLVM_SRC_ROOT = /home/akashk4/merge/profiling/hpvm/llvm -- knobs_file_path = /home/akashk4/merge/profiling/hpvm/llvm/projects/hpvm-tensor-rt/autotuner/data/global_knobs.txt -WARNING: pause_profiler was already called -Initializing policy object ... -DONE: Initializing policy object. -Select target device (0 for CPU, 1 fpr GPU): DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -INFO: Moving 3456 bytes from host to GPU -INFO: Moving 128 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.841121 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.841637 -INFO: TimeDuration, Event = Add_end, Time = 0.000516 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.841664 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.842129 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000465 -DEBUG: No data movement required - Data on Device -INFO: Moving 36864 bytes from host to GPU -INFO: Moving 128 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.851054 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.851532 -INFO: TimeDuration, Event = Add_end, Time = 0.000478 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.851560 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.852004 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352772.852022 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352772.855581 -INFO: TimeDuration, Event = Pool_end, Time = 0.003559 -DEBUG: No data movement required - Data on Device -INFO: Moving 73728 bytes from host to GPU -INFO: Moving 256 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.868034 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.868295 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.868323 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.868559 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000236 -DEBUG: No data movement required - Data on Device -INFO: Moving 147456 bytes from host to GPU -INFO: Moving 256 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.875953 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.876213 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.876230 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.876467 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352772.876485 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352772.879251 -INFO: TimeDuration, Event = Pool_end, Time = 0.002767 -DEBUG: No data movement required - Data on Device -INFO: Moving 294912 bytes from host to GPU -INFO: Moving 512 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.884984 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.885151 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.885169 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.885298 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -DEBUG: No data movement required - Data on Device -INFO: Moving 589824 bytes from host to GPU -INFO: Moving 512 bytes from host to GPU -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.889602 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.889770 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.889787 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.889917 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352772.889933 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352772.892838 -INFO: TimeDuration, Event = Pool_end, Time = 0.002905 -DEBUG: No data movement required - Data on Device -INFO: Moving 81920 bytes from host to GPU -INFO: Moving 40 bytes from host to GPU -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352772.893256 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352772.893342 -INFO: TimeDuration, Event = Mul_end, Time = 0.000086 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.893363 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.893391 -INFO: TimeDuration, Event = Add_end, Time = 0.000027 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352772.893408 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352772.893481 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000073 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 43.610630, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.975309 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.975835 -INFO: TimeDuration, Event = Add_end, Time = 0.000527 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.975858 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.976366 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000508 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352772.984446 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352772.984915 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352772.984939 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352772.985381 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352772.985399 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352772.988981 -INFO: TimeDuration, Event = Pool_end, Time = 0.003582 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.000540 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.000840 -INFO: TimeDuration, Event = Add_end, Time = 0.000300 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.000864 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.001098 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.008255 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.008515 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.008539 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.008776 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000236 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.008791 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.011551 -INFO: TimeDuration, Event = Pool_end, Time = 0.002760 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.017116 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.017285 -INFO: TimeDuration, Event = Add_end, Time = 0.000169 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.017302 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.017430 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.021492 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.021660 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.021676 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.021804 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.021822 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.024728 -INFO: TimeDuration, Event = Pool_end, Time = 0.002906 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.024752 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.024835 -INFO: TimeDuration, Event = Mul_end, Time = 0.000083 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.024851 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.024911 -INFO: TimeDuration, Event = Add_end, Time = 0.000059 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.024933 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.025029 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000096 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 52.857739, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.116424 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.116929 -INFO: TimeDuration, Event = Add_end, Time = 0.000504 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.116951 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.117395 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.125124 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.125604 -INFO: TimeDuration, Event = Add_end, Time = 0.000480 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.125627 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.126073 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000446 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.126095 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.129616 -INFO: TimeDuration, Event = Pool_end, Time = 0.003520 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.141210 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.141472 -INFO: TimeDuration, Event = Add_end, Time = 0.000262 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.141494 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.141731 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.148902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.149162 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.149178 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.149414 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.149429 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.152205 -INFO: TimeDuration, Event = Pool_end, Time = 0.002776 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.157743 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.157909 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.157926 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.158055 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.162129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.162295 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.162311 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.162441 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.162455 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.165367 -INFO: TimeDuration, Event = Pool_end, Time = 0.002912 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.165392 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.165476 -INFO: TimeDuration, Event = Mul_end, Time = 0.000084 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.165494 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.165519 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.165538 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.165611 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000073 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 52.628632, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.234672 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.235173 -INFO: TimeDuration, Event = Add_end, Time = 0.000501 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.235400 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.235855 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000455 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.245052 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.245534 -INFO: TimeDuration, Event = Add_end, Time = 0.000482 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.245555 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.245996 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.246013 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.249519 -INFO: TimeDuration, Event = Pool_end, Time = 0.003505 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.261089 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.261351 -INFO: TimeDuration, Event = Add_end, Time = 0.000262 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.261377 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.261615 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000239 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.268774 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.269035 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.269053 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.269287 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.269303 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.272074 -INFO: TimeDuration, Event = Pool_end, Time = 0.002771 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.277610 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.277777 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.277794 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.277923 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.281910 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.282076 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.282093 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.282222 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.282238 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.285149 -INFO: TimeDuration, Event = Pool_end, Time = 0.002912 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.285175 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.285258 -INFO: TimeDuration, Event = Mul_end, Time = 0.000083 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.285274 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.285300 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.285317 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.285372 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000055 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 53.728972, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.354815 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.355312 -INFO: TimeDuration, Event = Add_end, Time = 0.000498 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.355343 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.355801 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000458 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.364697 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.365167 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.365188 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.365627 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.365645 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.369167 -INFO: TimeDuration, Event = Pool_end, Time = 0.003522 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.380760 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.381025 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.381048 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.381281 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.388501 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.388758 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.388776 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.389011 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.389027 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.391803 -INFO: TimeDuration, Event = Pool_end, Time = 0.002776 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.397370 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.397538 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.397555 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.397682 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.401699 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.401865 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.401883 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.402013 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.402031 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.404932 -INFO: TimeDuration, Event = Pool_end, Time = 0.002902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.404959 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.405042 -INFO: TimeDuration, Event = Mul_end, Time = 0.000083 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.405059 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.405084 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.405103 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.405163 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000060 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 53.310803, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.478323 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.478834 -INFO: TimeDuration, Event = Add_end, Time = 0.000511 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.478872 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.479331 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000460 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.489196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.489685 -INFO: TimeDuration, Event = Add_end, Time = 0.000489 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.489707 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.490151 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.490168 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.493656 -INFO: TimeDuration, Event = Pool_end, Time = 0.003488 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.505253 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.505520 -INFO: TimeDuration, Event = Add_end, Time = 0.000267 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.505545 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.505781 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.512957 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.513214 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.513231 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.513465 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.513480 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.516266 -INFO: TimeDuration, Event = Pool_end, Time = 0.002786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.521826 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.521995 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.522011 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.522142 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.526128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.526295 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.526311 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.526441 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.526456 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.529366 -INFO: TimeDuration, Event = Pool_end, Time = 0.002910 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.529390 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.529474 -INFO: TimeDuration, Event = Mul_end, Time = 0.000083 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.529491 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.529516 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.529534 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.529588 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000054 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 56.370145, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.598669 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.599179 -INFO: TimeDuration, Event = Add_end, Time = 0.000510 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.599221 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.599696 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000475 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.609380 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.609871 -INFO: TimeDuration, Event = Add_end, Time = 0.000490 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.609905 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.610360 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000454 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.610391 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.613854 -INFO: TimeDuration, Event = Pool_end, Time = 0.003463 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.625584 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.625851 -INFO: TimeDuration, Event = Add_end, Time = 0.000267 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.625875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.626113 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000238 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.633880 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.634145 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.634175 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.634418 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000243 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.634446 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.637179 -INFO: TimeDuration, Event = Pool_end, Time = 0.002733 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.643035 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.643212 -INFO: TimeDuration, Event = Add_end, Time = 0.000177 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.643243 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.643384 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000141 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.647758 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.647933 -INFO: TimeDuration, Event = Add_end, Time = 0.000175 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.647964 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.648103 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000139 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.648127 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.650978 -INFO: TimeDuration, Event = Pool_end, Time = 0.002851 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.651005 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.651087 -INFO: TimeDuration, Event = Mul_end, Time = 0.000082 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.651105 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.651131 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.651149 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.651204 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000056 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 55.789074, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.720960 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.721474 -INFO: TimeDuration, Event = Add_end, Time = 0.000514 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.721605 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.722070 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000465 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.732606 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.733107 -INFO: TimeDuration, Event = Add_end, Time = 0.000501 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.733148 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.733606 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000458 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.733637 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.736802 -INFO: TimeDuration, Event = Pool_end, Time = 0.003166 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.748358 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.748620 -INFO: TimeDuration, Event = Add_end, Time = 0.000262 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.748642 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.748878 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000236 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.756053 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.756314 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.756483 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.756715 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.756730 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.759351 -INFO: TimeDuration, Event = Pool_end, Time = 0.002620 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.764919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.765087 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.765104 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.765234 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.769208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.769376 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.769392 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.769521 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.769536 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.772445 -INFO: TimeDuration, Event = Pool_end, Time = 0.002909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.772465 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.772547 -INFO: TimeDuration, Event = Mul_end, Time = 0.000082 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.772567 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.772592 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.772611 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.772664 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000054 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 54.458160, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.840330 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.840832 -INFO: TimeDuration, Event = Add_end, Time = 0.000503 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.840856 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.841284 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000428 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.849092 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.849568 -INFO: TimeDuration, Event = Add_end, Time = 0.000476 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.849591 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.850041 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000450 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.850058 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.853575 -INFO: TimeDuration, Event = Pool_end, Time = 0.003517 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.865153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.865413 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.865435 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.865670 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.872841 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.873102 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.873121 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.873354 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.873371 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.876138 -INFO: TimeDuration, Event = Pool_end, Time = 0.002768 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.884768 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.884957 -INFO: TimeDuration, Event = Add_end, Time = 0.000190 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.884977 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.885117 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000140 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.889057 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.889223 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.889240 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.889371 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.889387 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.892320 -INFO: TimeDuration, Event = Pool_end, Time = 0.002933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352773.892488 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352773.892719 -INFO: TimeDuration, Event = Mul_end, Time = 0.000231 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.892737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.892773 -INFO: TimeDuration, Event = Add_end, Time = 0.000036 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352773.892791 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352773.892883 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000091 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 55.434243, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.966177 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.966688 -INFO: TimeDuration, Event = Add_end, Time = 0.000510 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.966711 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.967158 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000447 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.975125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.975598 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.975619 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.976062 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.976079 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352773.979609 -INFO: TimeDuration, Event = Pool_end, Time = 0.003530 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.991194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.991457 -INFO: TimeDuration, Event = Add_end, Time = 0.000263 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.991478 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.991712 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352773.998900 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352773.999173 -INFO: TimeDuration, Event = Add_end, Time = 0.000273 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352773.999191 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352773.999424 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352773.999441 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.002198 -INFO: TimeDuration, Event = Pool_end, Time = 0.002757 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.007719 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.007887 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.007903 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.008035 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000132 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.012019 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.012185 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.012201 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.012331 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.012492 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.015252 -INFO: TimeDuration, Event = Pool_end, Time = 0.002760 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.015277 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.015361 -INFO: TimeDuration, Event = Mul_end, Time = 0.000083 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.015378 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.015404 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.015421 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.015476 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000056 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.809966, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.037959 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.038435 -INFO: TimeDuration, Event = Add_end, Time = 0.000476 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.038455 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.038898 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.046881 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.047350 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.047372 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.047811 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.047828 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.051361 -INFO: TimeDuration, Event = Pool_end, Time = 0.003533 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.062967 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.063231 -INFO: TimeDuration, Event = Add_end, Time = 0.000264 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.063253 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.063487 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.070644 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.070901 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.070917 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.071151 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.071166 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.073941 -INFO: TimeDuration, Event = Pool_end, Time = 0.002775 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.079458 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.079626 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.079690 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.079819 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.083747 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.083913 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.083930 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.084060 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.084074 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.086984 -INFO: TimeDuration, Event = Pool_end, Time = 0.002910 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.087010 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.087091 -INFO: TimeDuration, Event = Mul_end, Time = 0.000081 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.087108 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.087133 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.087151 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.087205 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000054 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.318447, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.109318 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.109795 -INFO: TimeDuration, Event = Add_end, Time = 0.000477 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.109814 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.110261 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000447 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.118222 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.118695 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.118714 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.119159 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.119181 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.122706 -INFO: TimeDuration, Event = Pool_end, Time = 0.003525 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.134301 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.134567 -INFO: TimeDuration, Event = Add_end, Time = 0.000266 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.134588 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.134822 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.142009 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.142267 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.142283 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.142522 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000238 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.142537 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.145306 -INFO: TimeDuration, Event = Pool_end, Time = 0.002769 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.150816 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.150985 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.151001 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.151132 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.155200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.155366 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.155383 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.155514 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.155529 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.158438 -INFO: TimeDuration, Event = Pool_end, Time = 0.002908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.158463 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.158546 -INFO: TimeDuration, Event = Mul_end, Time = 0.000082 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.158563 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.158588 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.158606 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.158660 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000054 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.417224, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.180244 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.180713 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.180733 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.181176 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.188718 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.189185 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.189200 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.189640 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.189654 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.193197 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.204663 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.204923 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.204940 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.205171 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.212121 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.212376 -INFO: TimeDuration, Event = Add_end, Time = 0.000254 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.212482 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.212713 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.212727 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.215414 -INFO: TimeDuration, Event = Pool_end, Time = 0.002687 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.220787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.220951 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.220964 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.221090 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.224986 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.225147 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.225160 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.225287 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.225298 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.228223 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.228242 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.228315 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.228477 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.228502 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.228516 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.228562 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.075457, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.251487 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.251958 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.251974 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.252417 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.259950 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.260418 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.260431 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.260870 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.260883 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.264427 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.275855 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.276113 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.276130 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.276359 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.283340 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.283598 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.283611 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.283841 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.283853 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.286634 -INFO: TimeDuration, Event = Pool_end, Time = 0.002781 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.292042 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.292206 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.292220 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.292348 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.296156 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.296329 -INFO: TimeDuration, Event = Add_end, Time = 0.000173 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.296339 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.296464 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.296476 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.299390 -INFO: TimeDuration, Event = Pool_end, Time = 0.002914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.299410 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.299481 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.299494 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.299516 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.299530 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.299576 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.809200, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.336605 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.337075 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.337092 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.337531 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.344839 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.345333 -INFO: TimeDuration, Event = Add_end, Time = 0.000494 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.345349 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.345785 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.345800 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.349314 -INFO: TimeDuration, Event = Pool_end, Time = 0.003513 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.360797 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.361057 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.361072 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.361302 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.368235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.368487 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.368502 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.368733 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.368746 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.371531 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.376908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.377071 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.377084 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.377212 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.381105 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.381265 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.381278 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.381404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.381417 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.384337 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.384354 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.384538 -INFO: TimeDuration, Event = Mul_end, Time = 0.000184 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.384553 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.384575 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.384589 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.384656 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000067 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.195098, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.405277 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.405747 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.405763 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.406204 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.413756 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.414220 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.414237 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.414672 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000435 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.414685 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.418233 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.429663 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.429922 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.429939 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.430168 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.437119 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.437371 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.437383 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.437613 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.437625 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.440414 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.445950 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.446114 -INFO: TimeDuration, Event = Add_end, Time = 0.000165 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.446127 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.446254 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.450049 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.450210 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.450223 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.450355 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000132 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.450368 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.453284 -INFO: TimeDuration, Event = Pool_end, Time = 0.002917 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.453304 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.453376 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.453390 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.453412 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.453426 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.453473 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000047 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.927601, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.506349 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.506818 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.506834 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.507276 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.514799 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.515263 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.515279 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.515716 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.515730 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.519277 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.530785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.531044 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.531076 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.531307 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.538235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.538486 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.538499 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.538729 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.538742 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.541530 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.546939 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.547102 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.547115 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.547245 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.551048 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.551209 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.551221 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.551347 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.551359 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.554282 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.554302 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.554374 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.554387 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.554409 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.554424 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.554469 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.250189, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.575080 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.575549 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.575565 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.576005 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.583568 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.584038 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.584054 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.584491 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.584618 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.588043 -INFO: TimeDuration, Event = Pool_end, Time = 0.003426 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.599476 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.599734 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.599753 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.599982 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.606911 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.607161 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.607174 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.607404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.607416 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.610205 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.615588 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.615750 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.615763 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.615889 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.619695 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.619855 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.619867 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.619992 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.620004 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.622926 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.622946 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.623018 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.623032 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.623054 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.623067 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.623112 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.809923, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.643998 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.644469 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.644722 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.645160 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.652509 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.652973 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.652988 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.653427 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.653440 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.656986 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.668443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.668702 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.668720 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.668950 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.675908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.676159 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.676172 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.676404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.676487 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.679205 -INFO: TimeDuration, Event = Pool_end, Time = 0.002717 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.684615 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.684814 -INFO: TimeDuration, Event = Add_end, Time = 0.000199 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.684829 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.684958 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.688731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.688892 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.688905 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.689031 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.689042 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.691965 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.691985 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.692055 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.692070 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.692092 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.692105 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.692150 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.959897, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.713195 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.713708 -INFO: TimeDuration, Event = Add_end, Time = 0.000513 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.713725 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.714178 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000453 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.721684 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.722156 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.722172 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.722611 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.722624 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.726180 -INFO: TimeDuration, Event = Pool_end, Time = 0.003556 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.737592 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.737870 -INFO: TimeDuration, Event = Add_end, Time = 0.000277 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.737886 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.738118 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.745059 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.745311 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.745325 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.745556 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.745569 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.748575 -INFO: TimeDuration, Event = Pool_end, Time = 0.003006 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.753740 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.753904 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.753918 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.754045 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.758110 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.758273 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.758286 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.758415 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.758430 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.761345 -INFO: TimeDuration, Event = Pool_end, Time = 0.002915 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.761365 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.761437 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.761450 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.761494 -INFO: TimeDuration, Event = Add_end, Time = 0.000044 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.761509 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.761569 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000060 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.345173, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.782637 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.783107 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.783123 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.783564 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.791149 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.791617 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.791632 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.792073 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.792086 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.795613 -INFO: TimeDuration, Event = Pool_end, Time = 0.003527 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.807040 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.807298 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.807314 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.807546 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.814499 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.814750 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.814764 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.814994 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.815006 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.817794 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.823186 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.823351 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.823364 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.823493 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.827287 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.827448 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.827461 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.827588 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.827599 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.830521 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.830540 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.830611 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.830624 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.830645 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.830659 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.830703 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.984216, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.851479 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.851947 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.851962 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.852405 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.860068 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.860536 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.860553 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.860991 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.861004 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.864542 -INFO: TimeDuration, Event = Pool_end, Time = 0.003538 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.875979 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.876239 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.876256 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.876487 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.883419 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.883671 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.883685 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.883914 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.883927 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.886716 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.892105 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.892268 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.892281 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.892411 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.896230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.896392 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.896461 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.896585 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.896597 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.899464 -INFO: TimeDuration, Event = Pool_end, Time = 0.002866 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.899483 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.899557 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.899570 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.899592 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.899606 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.899651 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.651008, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.920584 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.921055 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.921073 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.921518 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.929044 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.929510 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.929525 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.929964 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.929976 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.933522 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.944980 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.945239 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.945256 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.945486 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.952484 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.952735 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.952749 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.952981 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.952993 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.955777 -INFO: TimeDuration, Event = Pool_end, Time = 0.002784 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.961153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.961315 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.961328 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.961455 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.965250 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.965411 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.965424 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.965548 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.965560 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352774.968485 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352774.968505 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352774.968578 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.968592 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.968613 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352774.968627 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352774.968672 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.980030, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.989364 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.989837 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.989852 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.990292 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352774.997909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352774.998377 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352774.998392 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352774.998831 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352774.998842 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.002338 -INFO: TimeDuration, Event = Pool_end, Time = 0.003496 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.013782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.014040 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.014058 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.014286 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.021231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.021483 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.021496 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.021726 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.021738 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.024749 -INFO: TimeDuration, Event = Pool_end, Time = 0.003011 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.029939 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.030102 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.030116 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.030242 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.034038 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.034203 -INFO: TimeDuration, Event = Add_end, Time = 0.000165 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.034215 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.034342 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.034355 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.037271 -INFO: TimeDuration, Event = Pool_end, Time = 0.002916 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.037290 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.037360 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.037374 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.037395 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.037409 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.037492 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000083 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.049396, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.058073 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.058542 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.058559 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.058998 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.066522 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.066987 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.067003 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.067439 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.067453 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.071003 -INFO: TimeDuration, Event = Pool_end, Time = 0.003549 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.082432 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.082691 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.082708 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.082936 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.089883 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.090136 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.090149 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.090380 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.090392 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.093179 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.098562 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.098724 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.098740 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.098866 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.102672 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.102833 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.102846 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.102972 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.102985 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.105904 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.105923 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.105995 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.106009 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.106030 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.106043 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.106088 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.185160, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.126891 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.127355 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.127369 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.127812 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.135372 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.135838 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.135854 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.136292 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.136312 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.139848 -INFO: TimeDuration, Event = Pool_end, Time = 0.003536 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.151284 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.151541 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.151558 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.151788 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.158740 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.158992 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.159005 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.159237 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.159249 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.162037 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.167408 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.167570 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.167583 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.167714 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.171596 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.171757 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.171770 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.171895 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.171907 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.174830 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.174850 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.174921 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.174935 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.174956 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.174970 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.175014 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.998599, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.195736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.196206 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.196221 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.196662 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.204216 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.204685 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.204703 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.205144 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.205166 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.208809 -INFO: TimeDuration, Event = Pool_end, Time = 0.003643 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.220130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.220390 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.220539 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.220767 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.227567 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.227819 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.227832 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.228065 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.228076 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.230863 -INFO: TimeDuration, Event = Pool_end, Time = 0.002786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.238792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.238958 -INFO: TimeDuration, Event = Add_end, Time = 0.000165 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.238971 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.239099 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.242930 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.243093 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.243107 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.243237 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.243250 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.246163 -INFO: TimeDuration, Event = Pool_end, Time = 0.002914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.246183 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.246255 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.246268 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.246289 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.246304 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.246348 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 52.345020, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.267054 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.267522 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.267537 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.267981 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.275616 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.276084 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.276100 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.276539 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.276608 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.280088 -INFO: TimeDuration, Event = Pool_end, Time = 0.003480 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.291525 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.291783 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.291799 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.292028 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.298954 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.299206 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.299219 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.299449 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.299461 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.302252 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.307662 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.307823 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.307837 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.307964 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.311769 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.311929 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.311942 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.312069 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.312081 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.315003 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.315022 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.315094 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.315108 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.315129 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.315143 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.315187 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.949986, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.335766 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.336236 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.336251 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.336694 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.344259 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.344726 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.344742 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.345184 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.345198 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.348736 -INFO: TimeDuration, Event = Pool_end, Time = 0.003538 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.360176 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.360436 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.360558 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.360787 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.367613 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.367872 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.367885 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.368115 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.368127 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.370908 -INFO: TimeDuration, Event = Pool_end, Time = 0.002781 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.376283 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.376446 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.376475 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.376604 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.380575 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.380737 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.380749 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.380876 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.380888 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.383802 -INFO: TimeDuration, Event = Pool_end, Time = 0.002914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.383821 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.383893 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.383906 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.383927 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.383941 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.383986 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.077613, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.404507 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.404976 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.404994 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.405435 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.412975 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.413444 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.413459 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.413899 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.413912 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.417490 -INFO: TimeDuration, Event = Pool_end, Time = 0.003578 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.428916 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.429177 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.429193 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.429425 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.436388 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.436641 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.436654 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.436886 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.436898 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.439681 -INFO: TimeDuration, Event = Pool_end, Time = 0.002783 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.445059 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.445223 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.445236 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.445363 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.449177 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.449338 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.449352 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.449477 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.449490 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.452416 -INFO: TimeDuration, Event = Pool_end, Time = 0.002926 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.452515 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.452588 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.452601 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.452623 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.452636 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.452681 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.978399, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.473695 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.474166 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.474181 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.474622 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.482126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.482590 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.482605 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.483043 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.483058 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.486603 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.498035 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.498294 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.498311 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.498542 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.505496 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.505747 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.505760 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.505989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.506001 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.508790 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.514208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.514372 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.514385 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.514512 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.518333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.518494 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.518507 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.518634 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.518646 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.521567 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.521587 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.521657 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.521671 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.521693 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.521707 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.521751 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.161965, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.544721 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.545192 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.545209 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.545649 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.552969 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.553438 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.553455 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.553894 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.553905 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.557448 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.568879 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.569139 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.569157 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.569389 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.576332 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.576582 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.576594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.576825 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.576837 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.579624 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.585006 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.585169 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.585181 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.585307 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.589168 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.589329 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.589342 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.589467 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.589479 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.592407 -INFO: TimeDuration, Event = Pool_end, Time = 0.002928 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.592479 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.592551 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.592565 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.592587 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.592639 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.592686 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000047 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.825245, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.613373 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.613850 -INFO: TimeDuration, Event = Add_end, Time = 0.000477 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.613868 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.614313 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.621809 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.622275 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.622292 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.622730 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.622744 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.626289 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.637725 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.637986 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.638002 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.638233 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.645186 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.645440 -INFO: TimeDuration, Event = Add_end, Time = 0.000254 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.645453 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.645686 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.645699 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.648500 -INFO: TimeDuration, Event = Pool_end, Time = 0.002801 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.653867 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.654029 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.654041 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.654169 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.657984 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.658145 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.658158 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.658285 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.658297 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.661218 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.661238 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.661309 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.661322 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.661344 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.661357 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.661402 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.952807, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.684251 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.684728 -INFO: TimeDuration, Event = Add_end, Time = 0.000477 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.684745 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.685189 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.692736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.693206 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.693221 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.693666 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.693679 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.697217 -INFO: TimeDuration, Event = Pool_end, Time = 0.003537 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.708692 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.708952 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.708969 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.709199 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.716158 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.716410 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.716488 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.716718 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.716731 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.719453 -INFO: TimeDuration, Event = Pool_end, Time = 0.002721 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.724874 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.725038 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.725051 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.725178 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.728992 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.729154 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.729167 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.729294 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.729306 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.732226 -INFO: TimeDuration, Event = Pool_end, Time = 0.002920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.732245 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.732318 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.732482 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.732507 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.732522 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.732567 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.206724, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.753042 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.753510 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.753527 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.753967 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.761512 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.761978 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.761993 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.762431 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.762446 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.765987 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.777420 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.777679 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.777695 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.777925 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.784913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.785164 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.785177 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.785406 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.785418 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.788207 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.793647 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.793810 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.793823 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.793949 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.797745 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.797906 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.797920 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.798046 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.798058 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.800978 -INFO: TimeDuration, Event = Pool_end, Time = 0.002920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.800997 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.801068 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.801081 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.801103 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.801117 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.801161 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.012407, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.821771 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.822243 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.822258 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.822697 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.830288 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.830755 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.830770 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.831208 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.831219 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.834766 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.846198 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.846457 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.846474 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.846706 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.853652 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.853906 -INFO: TimeDuration, Event = Add_end, Time = 0.000254 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.853919 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.854149 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.854162 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.856949 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.862320 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.862483 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.862495 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.862623 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.866445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.866606 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.866619 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.866745 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.866756 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.869678 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.869699 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.869769 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.869782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.869804 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.869817 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.869862 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.135051, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.890379 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.890849 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.890865 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.891303 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.898846 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.899312 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.899326 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.899763 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.899777 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.903326 -INFO: TimeDuration, Event = Pool_end, Time = 0.003548 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.914756 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.915021 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.915037 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.915266 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.922196 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.922448 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.922461 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.922692 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.922704 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.925494 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.930868 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.931031 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.931044 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.931170 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.934974 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.935136 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.935150 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.935276 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.935287 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.938209 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352775.938229 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352775.938300 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.938314 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.938335 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352775.938349 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352775.938394 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.199082, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.959280 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.959752 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.959766 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.960205 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.967771 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.968238 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.968256 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.968699 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.968717 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.972247 -INFO: TimeDuration, Event = Pool_end, Time = 0.003530 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.984258 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.984537 -INFO: TimeDuration, Event = Add_end, Time = 0.000278 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.984601 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.984844 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000243 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352775.992497 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352775.992754 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352775.992771 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352775.993005 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352775.993020 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352775.995785 -INFO: TimeDuration, Event = Pool_end, Time = 0.002765 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.001308 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.001474 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.001491 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.001620 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.005590 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.005758 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.005774 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.005902 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.005917 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.008830 -INFO: TimeDuration, Event = Pool_end, Time = 0.002913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.008854 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.008935 -INFO: TimeDuration, Event = Mul_end, Time = 0.000081 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.008952 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.008977 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.008994 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.009044 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.753075, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.032121 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.032596 -INFO: TimeDuration, Event = Add_end, Time = 0.000475 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.032751 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.033192 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.041047 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.041519 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.041541 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.041984 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.042001 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.045530 -INFO: TimeDuration, Event = Pool_end, Time = 0.003528 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.057117 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.057382 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.057405 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.057637 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.064808 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.065064 -INFO: TimeDuration, Event = Add_end, Time = 0.000256 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.065081 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.065313 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.065328 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.068108 -INFO: TimeDuration, Event = Pool_end, Time = 0.002780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.073628 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.073796 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.073813 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.073942 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.077929 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.078095 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.078112 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.078243 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.078258 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.081167 -INFO: TimeDuration, Event = Pool_end, Time = 0.002909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.081192 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.081273 -INFO: TimeDuration, Event = Mul_end, Time = 0.000081 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.081291 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.081317 -INFO: TimeDuration, Event = Add_end, Time = 0.000026 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.081334 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.081386 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.353099, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.103192 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.103669 -INFO: TimeDuration, Event = Add_end, Time = 0.000478 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.103719 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.104160 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.112102 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.112573 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.112591 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.113034 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.113051 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.116593 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.128003 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.128264 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.128282 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.128515 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.135488 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.135740 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.135754 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.135984 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.135996 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.138783 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.144162 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.144324 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.144335 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.144461 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.148477 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.148638 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.148651 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.148776 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.148788 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.151616 -INFO: TimeDuration, Event = Pool_end, Time = 0.002828 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.151635 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.151707 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.151720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.151783 -INFO: TimeDuration, Event = Add_end, Time = 0.000062 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.151797 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.151842 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.749790, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.172695 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.173167 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.173185 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.173630 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.181153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.181619 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.181638 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.182072 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.182087 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.185632 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.197080 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.197340 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.197356 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.197587 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.204576 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.204827 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.204841 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.205069 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.205082 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.207850 -INFO: TimeDuration, Event = Pool_end, Time = 0.002768 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.213277 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.213441 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.213454 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.213580 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.217455 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.217616 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.217629 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.217755 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.217767 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.220689 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.220709 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.220780 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.220793 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.220815 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.220828 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.220872 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.070002, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.242465 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.242941 -INFO: TimeDuration, Event = Add_end, Time = 0.000476 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.242957 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.243397 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.251008 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.251470 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.251486 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.251927 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.251940 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.255486 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.266923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.267180 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.267198 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.267426 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.274369 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.274620 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.274633 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.274863 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.274873 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.277665 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.283038 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.283199 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.283211 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.283338 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.287166 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.287327 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.287339 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.287464 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.287475 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.290400 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.290419 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.290489 -INFO: TimeDuration, Event = Mul_end, Time = 0.000069 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.290502 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.290524 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.290539 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.290582 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.012053, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.311108 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.311578 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.311594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.312034 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.319709 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.320172 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.320189 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.320626 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.320640 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.324189 -INFO: TimeDuration, Event = Pool_end, Time = 0.003549 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.335631 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.335889 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.335907 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.336137 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.343114 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.343366 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.343379 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.343610 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.343622 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.346398 -INFO: TimeDuration, Event = Pool_end, Time = 0.002776 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.351778 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.351940 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.351952 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.352081 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.355905 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.356066 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.356080 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.356207 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.356218 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.359143 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.359163 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.359234 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.359248 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.359271 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.359285 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.359335 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.067984, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.379843 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.380331 -INFO: TimeDuration, Event = Add_end, Time = 0.000488 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.380713 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.381154 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.388406 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.388870 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.388887 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.389326 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.389341 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.392886 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.404349 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.404609 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.404628 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.404860 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.411812 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.412063 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.412076 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.412326 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000249 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.412336 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.415107 -INFO: TimeDuration, Event = Pool_end, Time = 0.002771 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.420496 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.420659 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.420672 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.420799 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.424650 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.424812 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.424826 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.424951 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.424963 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.427882 -INFO: TimeDuration, Event = Pool_end, Time = 0.002918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.427900 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.427971 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.427984 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.428006 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.428019 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.428063 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.761158, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.448698 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.449164 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.449179 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.449614 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000435 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.457098 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.457563 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.457594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.458033 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.458045 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.461574 -INFO: TimeDuration, Event = Pool_end, Time = 0.003528 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.473047 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.473307 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.473325 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.473556 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.480547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.480800 -INFO: TimeDuration, Event = Add_end, Time = 0.000253 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.480814 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.481042 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.481055 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.483841 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.489241 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.489403 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.489416 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.489543 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.493441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.493602 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.493615 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.493743 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.493755 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.496675 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.496694 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.496766 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.496780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.496802 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.496815 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.496858 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.057363, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.517194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.517664 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.517681 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.518121 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.525702 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.526164 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.526180 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.526651 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000470 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.526666 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.530177 -INFO: TimeDuration, Event = Pool_end, Time = 0.003511 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.541621 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.541880 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.541897 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.542128 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.549108 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.549360 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.549373 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.549603 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.549615 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.552407 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.557798 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.557962 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.557975 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.558101 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.561920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.562081 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.562094 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.562221 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.562233 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.565155 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.565176 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.565248 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.565262 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.565284 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.565298 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.565343 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.069347, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.585991 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.586464 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.586481 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.586921 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.594497 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.594961 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.594978 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.595416 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.595430 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.598968 -INFO: TimeDuration, Event = Pool_end, Time = 0.003537 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.610414 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.610672 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.610690 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.610924 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.617876 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.618127 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.618140 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.618370 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.618382 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.621174 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.626556 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.626719 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.626734 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.626861 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.630701 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.630863 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.630876 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.631001 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.631012 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.633934 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.633954 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.634026 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.634040 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.634061 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.634075 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.634120 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.266615, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.654721 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.655195 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.655212 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.655649 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.663259 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.663725 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.663741 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.664180 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.664194 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.667734 -INFO: TimeDuration, Event = Pool_end, Time = 0.003540 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.679179 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.679439 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.679456 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.679686 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.686667 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.686919 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.686933 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.687163 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.687176 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.689961 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.695418 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.695593 -INFO: TimeDuration, Event = Add_end, Time = 0.000176 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.695607 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.695732 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.699616 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.699776 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.699789 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.699915 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.699927 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.702849 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.702869 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.702976 -INFO: TimeDuration, Event = Mul_end, Time = 0.000107 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.702991 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.703012 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.703027 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.703072 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.263285, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.723593 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.724064 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.724080 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.724521 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.732118 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.732584 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.732601 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.733038 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.733051 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.736617 -INFO: TimeDuration, Event = Pool_end, Time = 0.003566 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.748037 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.748297 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.748534 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.748761 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.755504 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.755755 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.755769 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.755998 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.756010 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.758797 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.764217 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.764381 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.764480 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.764604 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.768520 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.768682 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.768696 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.768821 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.768835 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.771747 -INFO: TimeDuration, Event = Pool_end, Time = 0.002913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.771767 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.771839 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.771852 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.771875 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.771889 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.771934 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.987846, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.792711 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.793181 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.793197 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.793638 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.801071 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.801534 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.801553 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.801989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.802002 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.805547 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.816997 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.817255 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.817273 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.817502 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.824469 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.824721 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.824735 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.824965 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.824977 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.827763 -INFO: TimeDuration, Event = Pool_end, Time = 0.002786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.833153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.833316 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.833329 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.833457 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.837296 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.837457 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.837470 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.837597 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.837610 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.840531 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.840551 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.840623 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.840636 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.840659 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.840672 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.840716 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.088471, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.861619 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.862085 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.862102 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.862545 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.870130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.870592 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.870607 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.871046 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.871060 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.874608 -INFO: TimeDuration, Event = Pool_end, Time = 0.003548 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.886054 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.886313 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.886355 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.886585 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.893538 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.893790 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.893804 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.894033 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.894046 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.896868 -INFO: TimeDuration, Event = Pool_end, Time = 0.002822 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.902243 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.902405 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.902418 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.902548 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.906400 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.906562 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.906575 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.906699 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.906711 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.909632 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.909651 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.909722 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.909736 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.909758 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.909772 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.909816 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.240827, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.933618 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.934099 -INFO: TimeDuration, Event = Add_end, Time = 0.000481 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.934121 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.934567 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000446 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.942528 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.942998 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.943020 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.943459 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.943479 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.947008 -INFO: TimeDuration, Event = Pool_end, Time = 0.003529 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.958592 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.958856 -INFO: TimeDuration, Event = Add_end, Time = 0.000264 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.958878 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.959114 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.966311 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.966566 -INFO: TimeDuration, Event = Add_end, Time = 0.000256 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.966585 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.966819 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.966836 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.969602 -INFO: TimeDuration, Event = Pool_end, Time = 0.002766 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.975126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.975295 -INFO: TimeDuration, Event = Add_end, Time = 0.000169 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.975313 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.975445 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000132 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.979433 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.979602 -INFO: TimeDuration, Event = Add_end, Time = 0.000169 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352776.979620 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352776.979751 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352776.979769 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352776.982671 -INFO: TimeDuration, Event = Pool_end, Time = 0.002901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352776.982698 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352776.982781 -INFO: TimeDuration, Event = Mul_end, Time = 0.000082 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352776.982801 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352776.982828 -INFO: TimeDuration, Event = Add_end, Time = 0.000027 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352776.982847 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352776.982901 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000053 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.351734, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.005540 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.006027 -INFO: TimeDuration, Event = Add_end, Time = 0.000486 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.006052 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.006500 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000448 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.014643 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.015110 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.015128 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.015566 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.015580 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.019121 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.030609 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.030870 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.030889 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.031118 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.038136 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.038389 -INFO: TimeDuration, Event = Add_end, Time = 0.000253 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.038403 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.038634 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.038647 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.041431 -INFO: TimeDuration, Event = Pool_end, Time = 0.002784 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.046923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.047093 -INFO: TimeDuration, Event = Add_end, Time = 0.000170 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.047107 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.047234 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.051101 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.051262 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.051276 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.051403 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.051416 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.054333 -INFO: TimeDuration, Event = Pool_end, Time = 0.002917 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.054355 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.054428 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.054443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.054465 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.054479 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.054523 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.657028, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.075693 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.076164 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.076180 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.076624 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.084293 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.084805 -INFO: TimeDuration, Event = Add_end, Time = 0.000512 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.084824 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.085262 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.085276 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.088774 -INFO: TimeDuration, Event = Pool_end, Time = 0.003498 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.100252 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.100570 -INFO: TimeDuration, Event = Add_end, Time = 0.000317 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.100591 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.100820 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.107786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.108038 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.108052 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.108283 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.108296 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.111082 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.116494 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.116657 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.116671 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.116797 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.120653 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.120815 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.120829 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.120956 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.120968 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.123887 -INFO: TimeDuration, Event = Pool_end, Time = 0.002918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.123907 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.123980 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.123994 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.124016 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.124030 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.124076 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.324629, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.144907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.145379 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.145396 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.145841 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000446 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.153615 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.154098 -INFO: TimeDuration, Event = Add_end, Time = 0.000483 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.154114 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.154552 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.154568 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.158116 -INFO: TimeDuration, Event = Pool_end, Time = 0.003548 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.169597 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.169860 -INFO: TimeDuration, Event = Add_end, Time = 0.000263 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.169878 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.170109 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.177121 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.177378 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.177392 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.177626 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.177640 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.180417 -INFO: TimeDuration, Event = Pool_end, Time = 0.002776 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.185963 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.186132 -INFO: TimeDuration, Event = Add_end, Time = 0.000169 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.186147 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.186275 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.190121 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.190283 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.190297 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.190423 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.190436 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.193358 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.193379 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.193458 -INFO: TimeDuration, Event = Mul_end, Time = 0.000079 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.193473 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.193497 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.193512 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.193581 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000069 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.104661, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.214620 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.215094 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.215111 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.215554 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.223205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.223671 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.223687 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.224123 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.224137 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.227681 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.242328 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.242599 -INFO: TimeDuration, Event = Add_end, Time = 0.000271 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.242619 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.242850 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.249097 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.249351 -INFO: TimeDuration, Event = Add_end, Time = 0.000254 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.249366 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.249595 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.249608 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.252394 -INFO: TimeDuration, Event = Pool_end, Time = 0.002786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.258035 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.258201 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.258216 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.258345 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.262212 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.262374 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.262388 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.262515 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.262527 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.265447 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.265468 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.265610 -INFO: TimeDuration, Event = Mul_end, Time = 0.000142 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.265625 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.265647 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.265662 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.265708 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 52.220574, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.286710 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.287182 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.287209 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.287650 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.295356 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.295822 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.295838 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.296276 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.296288 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.299818 -INFO: TimeDuration, Event = Pool_end, Time = 0.003529 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.311288 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.311547 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.311565 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.311802 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.318795 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.319046 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.319060 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.319288 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.319320 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.322090 -INFO: TimeDuration, Event = Pool_end, Time = 0.002770 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.327506 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.327668 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.327682 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.327807 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.331636 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.331796 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.331809 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.331934 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.331946 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.334869 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.334889 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.334962 -INFO: TimeDuration, Event = Mul_end, Time = 0.000074 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.334976 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.334999 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.335012 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.335057 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.294206, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.356098 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.356573 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.356695 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.357134 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.364685 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.365151 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.365167 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.365606 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.365621 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.369163 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.380630 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.380892 -INFO: TimeDuration, Event = Add_end, Time = 0.000262 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.380911 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.381141 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.388149 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.388402 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.388412 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.388642 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.388655 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.391443 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.396842 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.397006 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.397020 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.397146 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.400994 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.401156 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.401169 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.401295 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.401310 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.404229 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.404248 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.404321 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.404482 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.404507 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.404522 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.404567 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000046 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.255913, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.425065 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.425535 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.425552 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.425990 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.433650 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.434114 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.434131 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.434564 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000433 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.434577 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.438130 -INFO: TimeDuration, Event = Pool_end, Time = 0.003553 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.451650 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.451919 -INFO: TimeDuration, Event = Add_end, Time = 0.000270 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.451938 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.452175 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.459295 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.459547 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.459562 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.459793 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.459805 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.462596 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.468014 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.468176 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.468189 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.468328 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000139 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.472160 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.472562 -INFO: TimeDuration, Event = Add_end, Time = 0.000402 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.472580 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.472704 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.472718 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.475395 -INFO: TimeDuration, Event = Pool_end, Time = 0.002677 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.475415 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.475487 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.475500 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.475528 -INFO: TimeDuration, Event = Add_end, Time = 0.000027 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.475542 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.475594 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 52.441887, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.496188 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.496664 -INFO: TimeDuration, Event = Add_end, Time = 0.000476 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.496679 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.497119 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.504749 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.505215 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.505232 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.505668 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.505683 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.509226 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.520690 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.520950 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.520968 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.521198 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.528185 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.528436 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.528479 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.528709 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.528721 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.531480 -INFO: TimeDuration, Event = Pool_end, Time = 0.002758 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.536881 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.537044 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.537058 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.537185 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.541045 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.541207 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.541220 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.541345 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.541358 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.544281 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.544301 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.544381 -INFO: TimeDuration, Event = Mul_end, Time = 0.000080 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.544478 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.544502 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.544517 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.544562 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.185359, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.565737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.566210 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.566227 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.566666 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.574376 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.574839 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.574855 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.575293 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.575307 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.578856 -INFO: TimeDuration, Event = Pool_end, Time = 0.003550 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.590317 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.590578 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.590595 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.590826 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.597815 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.598066 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.598080 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.598310 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.598323 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.601110 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.606552 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.606715 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.606729 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.606856 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.610690 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.610852 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.610866 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.610991 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.611004 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.613940 -INFO: TimeDuration, Event = Pool_end, Time = 0.002936 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.613960 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.614032 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.614046 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.614068 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.614082 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.614127 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.393541, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.635432 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.635901 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.635918 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.636357 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.644232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.644700 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.644718 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.645156 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.645169 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.648717 -INFO: TimeDuration, Event = Pool_end, Time = 0.003548 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.660172 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.660432 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.660536 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.660765 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.667958 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.668209 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.668222 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.668457 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.668494 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.671257 -INFO: TimeDuration, Event = Pool_end, Time = 0.002763 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.676653 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.676817 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.676831 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.676959 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.680821 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.680983 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.680996 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.681123 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.681136 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.684055 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.684074 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.684146 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.684160 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.684183 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.684197 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.684241 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.632287, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.705081 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.705549 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.705564 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.706004 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.713670 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.714136 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.714152 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.714588 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.714603 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.718133 -INFO: TimeDuration, Event = Pool_end, Time = 0.003530 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.729599 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.729858 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.729876 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.730106 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.737092 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.737344 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.737357 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.737586 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.737598 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.740389 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.746156 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.746336 -INFO: TimeDuration, Event = Add_end, Time = 0.000180 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.746351 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.746478 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.750321 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.750481 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.750495 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.750621 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.750633 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.753556 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.753576 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.753649 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.753663 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.753685 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.753699 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.753743 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.415207, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.774858 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.775327 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.775343 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.775784 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.783429 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.783892 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.783909 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.784346 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.784357 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.787908 -INFO: TimeDuration, Event = Pool_end, Time = 0.003551 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.799361 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.799620 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.799637 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.799867 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.806836 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.807087 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.807100 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.807331 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.807343 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.810132 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.815570 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.815734 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.815747 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.815874 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.819731 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.819895 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.819908 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.820034 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.820047 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.822962 -INFO: TimeDuration, Event = Pool_end, Time = 0.002915 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.823020 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.823092 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.823107 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.823128 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.823142 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.823187 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.243111, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.844016 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.844485 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.844499 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.844943 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.852544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.853006 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.853023 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.853464 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.853478 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.857023 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.868490 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.868752 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.868769 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.868998 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.875999 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.876255 -INFO: TimeDuration, Event = Add_end, Time = 0.000255 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.876267 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.876497 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.876510 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.879292 -INFO: TimeDuration, Event = Pool_end, Time = 0.002782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.884699 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.884861 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.884875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.885000 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.888868 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.889029 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.889043 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.889169 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.889180 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.892102 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.892121 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.892194 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.892208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.892230 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.892245 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.892289 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.193689, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.912871 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.913341 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.913356 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.913800 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.921421 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.921888 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.921904 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.922340 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.922354 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.925896 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.937361 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.937620 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.937637 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.937868 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.945004 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.945258 -INFO: TimeDuration, Event = Add_end, Time = 0.000254 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.945272 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.945503 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.945516 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.948300 -INFO: TimeDuration, Event = Pool_end, Time = 0.002784 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.953960 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.954125 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.954139 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.954267 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.958221 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.958384 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.958397 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.958523 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.958535 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.961457 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352777.961477 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352777.961550 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.961565 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.961587 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352777.961602 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352777.961646 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.704457, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.982577 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.983058 -INFO: TimeDuration, Event = Add_end, Time = 0.000481 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.983074 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.983513 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352777.991128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352777.991591 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352777.991606 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352777.992046 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352777.992060 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352777.995607 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.007071 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.007329 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.007348 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.007579 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.014564 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.014816 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.014829 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.015096 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000267 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.015110 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.017863 -INFO: TimeDuration, Event = Pool_end, Time = 0.002753 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.023253 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.023416 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.023431 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.023557 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.027394 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.027555 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.027571 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.027697 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.027710 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.030626 -INFO: TimeDuration, Event = Pool_end, Time = 0.002916 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.030646 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.030719 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.030732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.030754 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.030768 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.030813 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.462858, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.052028 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.052499 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.052513 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.052955 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.060576 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.061043 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.061059 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.061495 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.061509 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.065055 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.076535 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.076795 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.076812 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.077045 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.084039 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.084291 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.084311 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.084543 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.084556 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.087337 -INFO: TimeDuration, Event = Pool_end, Time = 0.002782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.092735 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.092898 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.092912 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.093038 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.096890 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.097053 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.097066 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.097193 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.097205 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.100139 -INFO: TimeDuration, Event = Pool_end, Time = 0.002934 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.100158 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.100231 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.100245 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.100267 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.100281 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.100332 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.229369, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.121039 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.121507 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.121525 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.121968 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.129550 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.130013 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.130030 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.130468 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.130483 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.134019 -INFO: TimeDuration, Event = Pool_end, Time = 0.003536 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.146431 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.146697 -INFO: TimeDuration, Event = Add_end, Time = 0.000266 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.146716 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.146947 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.153893 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.154144 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.154157 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.154386 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.154398 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.157211 -INFO: TimeDuration, Event = Pool_end, Time = 0.002813 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.162589 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.162751 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.162764 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.162889 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.166710 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.166870 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.166883 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.167010 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.167021 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.169943 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.169963 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.170039 -INFO: TimeDuration, Event = Mul_end, Time = 0.000076 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.170053 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.170077 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.170091 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.170142 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.013250, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.193684 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.194160 -INFO: TimeDuration, Event = Add_end, Time = 0.000476 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.194175 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.194618 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.202324 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.202789 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.202805 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.203240 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000435 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.203252 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.206804 -INFO: TimeDuration, Event = Pool_end, Time = 0.003551 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.218249 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.218508 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.218527 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.218757 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.225715 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.225966 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.225979 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.226212 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.226224 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.229027 -INFO: TimeDuration, Event = Pool_end, Time = 0.002803 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.234433 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.234596 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.234608 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.234733 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.238580 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.238741 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.238754 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.238881 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.238893 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.241814 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.241834 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.241907 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.241921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.241945 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.241959 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.242004 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.223112, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.263067 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.263536 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.263549 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.263990 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.271537 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.272001 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.272016 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.272455 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.272613 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.276016 -INFO: TimeDuration, Event = Pool_end, Time = 0.003403 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.287444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.287702 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.287719 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.287949 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.294870 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.295121 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.295133 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.295362 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.295374 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.298166 -INFO: TimeDuration, Event = Pool_end, Time = 0.002793 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.303542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.303708 -INFO: TimeDuration, Event = Add_end, Time = 0.000165 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.303720 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.303846 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.307639 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.307799 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.307811 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.307937 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.307948 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.310872 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.310890 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.310961 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.310975 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.310997 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.311010 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.311053 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.722524, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.332037 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.332510 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.332703 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.333145 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.340522 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.340989 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.341006 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.341440 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.341454 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.344997 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.356428 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.356688 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.356705 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.356933 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.363894 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.364146 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.364158 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.364389 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.364481 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.367194 -INFO: TimeDuration, Event = Pool_end, Time = 0.002712 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.372609 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.372772 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.372785 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.372912 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.376716 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.376877 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.376891 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.377015 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.377026 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.379950 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.380008 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.380080 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.380093 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.380114 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.380128 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.380172 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.763666, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.400886 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.401354 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.401370 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.401808 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.409354 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.409823 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.409839 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.410276 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.410288 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.413834 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.425263 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.425521 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.425538 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.425770 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.432701 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.432952 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.432966 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.433198 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.433210 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.435992 -INFO: TimeDuration, Event = Pool_end, Time = 0.002782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.441367 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.441530 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.441542 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.441667 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.445462 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.445622 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.445635 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.445760 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.445773 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.448698 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.448718 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.448788 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.448802 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.448823 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.448836 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.448879 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.879827, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.469739 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.470210 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.470226 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.470672 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000446 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.478187 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.478650 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.478664 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.479102 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.479116 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.482664 -INFO: TimeDuration, Event = Pool_end, Time = 0.003548 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.494094 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.494353 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.494370 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.494601 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.501536 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.501788 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.501802 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.502032 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.502042 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.504830 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.510219 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.510381 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.510394 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.510520 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.514340 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.514502 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.514513 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.514639 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.514650 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.517572 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.517592 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.517662 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.517676 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.517697 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.517710 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.517754 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.194578, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.539156 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.539628 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.539643 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.540085 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.547862 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.548329 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.548535 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.548984 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000449 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.548999 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.552634 -INFO: TimeDuration, Event = Pool_end, Time = 0.003635 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.563886 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.564149 -INFO: TimeDuration, Event = Add_end, Time = 0.000263 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.564166 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.564400 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.571597 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.571933 -INFO: TimeDuration, Event = Add_end, Time = 0.000336 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.571946 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.572176 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.572190 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.574889 -INFO: TimeDuration, Event = Pool_end, Time = 0.002699 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.580333 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.580503 -INFO: TimeDuration, Event = Add_end, Time = 0.000170 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.580516 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.580644 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.584754 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.584918 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.584932 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.585057 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.585071 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.587983 -INFO: TimeDuration, Event = Pool_end, Time = 0.002913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.588002 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.588084 -INFO: TimeDuration, Event = Mul_end, Time = 0.000081 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.588111 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.588133 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.588147 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.588191 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.989680, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.609484 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.609958 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.609974 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.610413 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.617864 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.618331 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.618347 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.618810 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000463 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.618825 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.622340 -INFO: TimeDuration, Event = Pool_end, Time = 0.003515 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.633804 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.634064 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.634081 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.634310 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.641249 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.641501 -INFO: TimeDuration, Event = Add_end, Time = 0.000253 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.641514 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.641743 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.641754 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.644543 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.649924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.650086 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.650099 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.650223 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.654020 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.654183 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.654196 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.654321 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.654333 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.657256 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.657275 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.657347 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.657361 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.657383 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.657397 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.657441 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.172893, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.678240 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.678714 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.678731 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.679169 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.686683 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.687144 -INFO: TimeDuration, Event = Add_end, Time = 0.000461 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.687159 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.687601 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.687614 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.691164 -INFO: TimeDuration, Event = Pool_end, Time = 0.003550 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.702596 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.702856 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.702872 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.703100 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.710032 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.710282 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.710294 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.710523 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.710535 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.713331 -INFO: TimeDuration, Event = Pool_end, Time = 0.002796 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.718703 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.718866 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.718878 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.719009 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.722812 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.722972 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.722985 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.723111 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.723123 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.726045 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.726068 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.726140 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.726153 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.726175 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.726189 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.726233 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.288775, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.746931 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.747398 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.747413 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.747854 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.755406 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.755869 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.755884 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.756321 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.756660 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.759881 -INFO: TimeDuration, Event = Pool_end, Time = 0.003220 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.771307 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.771565 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.771582 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.771811 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.778748 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.778998 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.779011 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.779240 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.779251 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.782045 -INFO: TimeDuration, Event = Pool_end, Time = 0.002793 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.787450 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.787612 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.787624 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.787750 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.791543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.791703 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.791716 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.791840 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.791851 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.794777 -INFO: TimeDuration, Event = Pool_end, Time = 0.002926 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.794796 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.794869 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.794883 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.794904 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.794917 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.794961 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.561406, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.815440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.815912 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.815926 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.816367 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.823920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.824386 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.824508 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.824941 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000433 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.824954 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.828737 -INFO: TimeDuration, Event = Pool_end, Time = 0.003783 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.839838 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.840102 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.840121 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.840552 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000432 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.847263 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.847515 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.847528 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.847758 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.847770 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.850562 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.855937 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.856100 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.856112 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.856236 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.860036 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.860195 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.860208 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.860333 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.860343 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.863273 -INFO: TimeDuration, Event = Pool_end, Time = 0.002930 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.863292 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.863362 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.863375 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.863400 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.863415 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.863463 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.463555, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.883800 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.884271 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.884285 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.884726 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.892251 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.892716 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.892733 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.893172 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.893185 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.896728 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.908162 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.908422 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.908437 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.908667 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.915588 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.915839 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.915852 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.916082 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.916094 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.918886 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.924254 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.924417 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.924477 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.924602 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.928688 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.928851 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.928864 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.928989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.929002 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.931914 -INFO: TimeDuration, Event = Pool_end, Time = 0.002912 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352778.931933 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352778.932005 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.932019 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.932040 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352778.932053 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352778.932097 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.132401, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.953035 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.953506 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.953522 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.953962 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.961483 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.961948 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.961962 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.962397 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.962410 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.965964 -INFO: TimeDuration, Event = Pool_end, Time = 0.003553 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.977406 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.977666 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.977682 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.977915 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.984872 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.985123 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.985136 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.985366 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.985378 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352778.988170 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.993553 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.993716 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.993727 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.993853 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352778.997656 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352778.997818 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352778.997831 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352778.997956 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352778.997968 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.000894 -INFO: TimeDuration, Event = Pool_end, Time = 0.002926 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.000913 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.000983 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.000997 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.001018 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.001031 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.001074 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.934594, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.021846 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.022316 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.022332 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.022776 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.030267 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.030734 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.030749 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.031188 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.031202 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.034746 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.046180 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.046438 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.046454 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.046685 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.053618 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.053868 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.053881 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.054111 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.054122 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.056914 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.062285 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.062448 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.062462 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.062587 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.066411 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.066572 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.066585 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.066710 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.066722 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.069643 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.069662 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.069733 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.069746 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.069767 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.069781 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.069826 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.022579, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.090641 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.091113 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.091128 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.091564 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.099143 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.099610 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.099625 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.100063 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.100076 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.103621 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.115050 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.115309 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.115326 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.115557 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.122540 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.122791 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.122803 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.123035 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.123048 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.125836 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.131245 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.131407 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.131420 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.131546 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.135334 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.135493 -INFO: TimeDuration, Event = Add_end, Time = 0.000159 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.135505 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.135632 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.135643 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.138568 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.138587 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.138657 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.138670 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.138692 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.138705 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.138749 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.048527, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.159617 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.160088 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.160102 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.160543 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.168084 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.168546 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.168563 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.169000 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.169014 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.172560 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.183988 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.184245 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.184261 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.184493 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.191422 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.191674 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.191686 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.191916 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.191928 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.194717 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.200088 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.200250 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.200262 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.200389 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.204180 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.204340 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.204458 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.204581 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000122 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.204592 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.207414 -INFO: TimeDuration, Event = Pool_end, Time = 0.002822 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.207434 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.207505 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.207519 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.207540 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.207554 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.207598 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.840224, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.228043 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.228544 -INFO: TimeDuration, Event = Add_end, Time = 0.000501 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.228742 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.229182 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.236528 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.236990 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.237006 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.237441 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.237455 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.241004 -INFO: TimeDuration, Event = Pool_end, Time = 0.003549 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.253349 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.253608 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.253625 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.253854 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.260819 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.261071 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.261083 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.261313 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.261325 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.264100 -INFO: TimeDuration, Event = Pool_end, Time = 0.002775 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.269479 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.269640 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.269654 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.269778 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.273570 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.273731 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.273742 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.273869 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.273880 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.276803 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.276823 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.276893 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.276907 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.276929 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.276942 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.276986 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.662651, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.297216 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.297686 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.297702 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.298145 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.305693 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.306159 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.306173 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.306610 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.306622 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.310157 -INFO: TimeDuration, Event = Pool_end, Time = 0.003535 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.321591 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.321850 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.321867 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.322100 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.329033 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.329284 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.329297 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.329530 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.329542 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.332331 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.337715 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.337880 -INFO: TimeDuration, Event = Add_end, Time = 0.000165 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.337893 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.338020 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.341801 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.341961 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.341974 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.342099 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.342110 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.345033 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.345053 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.345122 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.345140 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.345161 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.345175 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.345219 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.897658, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.366205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.366679 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.366694 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.367136 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.374637 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.375100 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.375116 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.375554 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.375567 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.379119 -INFO: TimeDuration, Event = Pool_end, Time = 0.003552 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.390542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.390802 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.390820 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.391049 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.398001 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.398252 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.398264 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.398496 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.398507 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.401296 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.406664 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.406827 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.406839 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.406965 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.410773 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.410933 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.410947 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.411075 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.411087 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.414006 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.414025 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.414096 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.414111 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.414133 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.414146 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.414190 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.214562, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.434805 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.435275 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.435289 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.435729 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.443281 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.443746 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.443764 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.444201 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.444213 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.447763 -INFO: TimeDuration, Event = Pool_end, Time = 0.003549 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.459187 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.459446 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.459463 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.459693 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.466623 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.466874 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.466887 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.467116 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.467127 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.469918 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.475294 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.475456 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.475469 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.475595 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.479391 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.479551 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.479563 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.479689 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.479738 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.482627 -INFO: TimeDuration, Event = Pool_end, Time = 0.002889 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.482646 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.482717 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.482730 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.482751 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.482764 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.482809 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.889720, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.503886 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.504353 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.504365 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.504810 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.512388 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.512853 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.512870 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.513309 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.513321 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.516869 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.528299 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.528564 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.528581 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.528811 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.535740 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.535990 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.536003 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.536233 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.536245 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.539038 -INFO: TimeDuration, Event = Pool_end, Time = 0.002793 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.544411 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.544573 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.544586 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.544710 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.548870 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.549032 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.549045 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.549171 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.549184 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.552100 -INFO: TimeDuration, Event = Pool_end, Time = 0.002917 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.552120 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.552193 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.552206 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.552227 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.552241 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.552285 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.282557, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.572737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.573206 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.573221 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.573664 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.581191 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.581659 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.581673 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.582110 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.582123 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.585667 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.597096 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.597356 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.597372 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.597605 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.604542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.604800 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.604813 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.605043 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.605055 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.607834 -INFO: TimeDuration, Event = Pool_end, Time = 0.002780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.613207 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.613370 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.613383 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.613513 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.617306 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.617467 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.617480 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.617605 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.617617 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.620542 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.620562 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.620633 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.620646 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.620667 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.620681 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.620724 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.875646, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.642612 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.643093 -INFO: TimeDuration, Event = Add_end, Time = 0.000481 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.643113 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.643565 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.651745 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.652208 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.652225 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.652665 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.652683 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.656224 -INFO: TimeDuration, Event = Pool_end, Time = 0.003541 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.667622 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.667880 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.667896 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.668124 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.675093 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.675343 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.675355 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.675584 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.675593 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.678383 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.683800 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.683961 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.683973 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.684098 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.687913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.688074 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.688086 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.688219 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000132 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.688230 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.691146 -INFO: TimeDuration, Event = Pool_end, Time = 0.002917 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.691165 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.691233 -INFO: TimeDuration, Event = Mul_end, Time = 0.000069 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.691246 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.691267 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.691280 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.691323 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.388619, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.712720 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.713195 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.713210 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.713648 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.721076 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.721545 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.721560 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.721997 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.722010 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.725552 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.737010 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.737267 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.737285 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.737514 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.744477 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.744727 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.744741 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.744972 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.744983 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.747769 -INFO: TimeDuration, Event = Pool_end, Time = 0.002786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.753187 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.753348 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.753362 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.753489 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.757282 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.757443 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.757456 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.757582 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.757594 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.760516 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.760535 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.760606 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.760619 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.760640 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.760653 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.760698 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.896637, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.781387 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.781861 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.781878 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.782322 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.789692 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.790157 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.790171 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.790606 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.790620 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.794171 -INFO: TimeDuration, Event = Pool_end, Time = 0.003551 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.805600 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.805860 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.805876 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.806106 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.813084 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.813334 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.813348 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.813579 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.813591 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.816378 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.822101 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.822264 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.822277 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.822403 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.826238 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.826400 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.826412 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.826537 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.826549 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.829473 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.829492 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.829563 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.829577 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.829598 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.829615 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.829659 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.161368, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.850644 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.851118 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.851169 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.851608 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.859082 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.859547 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.859562 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.859997 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.860010 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.863562 -INFO: TimeDuration, Event = Pool_end, Time = 0.003551 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.875002 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.875262 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.875281 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.875510 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.882439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.882691 -INFO: TimeDuration, Event = Add_end, Time = 0.000253 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.882705 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.882936 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.882948 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.885732 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.891104 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.891266 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.891278 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.891404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.895228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.895390 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.895402 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.895526 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.895538 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.898457 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.898477 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.898548 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.898561 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.898582 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.898597 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.898640 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.241734, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.919368 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.919839 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.919856 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.920293 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.927777 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.928241 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.928256 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.928695 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.928711 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.932258 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.943682 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.943940 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.943956 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.944187 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.951150 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.951401 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.951412 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.951642 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.951653 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.954444 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.959813 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.959974 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.959986 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.960111 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.963908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.964068 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.964082 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.964208 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.964219 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352779.967141 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352779.967159 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352779.967227 -INFO: TimeDuration, Event = Mul_end, Time = 0.000068 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.967240 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.967261 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352779.967276 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352779.967318 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.836951, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.988336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.988809 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.988825 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.989267 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352779.996896 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352779.997362 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352779.997377 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352779.997815 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352779.997841 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.001368 -INFO: TimeDuration, Event = Pool_end, Time = 0.003526 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.012784 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.013043 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.013061 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.013291 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.020257 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.020507 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.020521 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.020751 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.020764 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.023549 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.028963 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.029124 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.029137 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.029265 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.033071 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.033232 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.033289 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.033414 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.033427 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.036311 -INFO: TimeDuration, Event = Pool_end, Time = 0.002884 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.036495 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.036567 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.036580 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.036603 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.036617 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.036661 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000045 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.098943, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.057061 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.057533 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.057548 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.057992 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.065512 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.065978 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.065993 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.066433 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.066447 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.069990 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.081422 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.081682 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.081697 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.081930 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.088893 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.089143 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.089155 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.089383 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.089395 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.092187 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.097631 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.097794 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.097807 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.097933 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.101737 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.101897 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.101909 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.102034 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.102046 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.104969 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.104989 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.105059 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.105072 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.105093 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.105106 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.105151 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.987597, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.125822 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.126292 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.126307 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.126745 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.134265 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.134729 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.134748 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.135181 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.135198 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.138749 -INFO: TimeDuration, Event = Pool_end, Time = 0.003551 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.150174 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.150433 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.150450 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.150681 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.157611 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.157861 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.157874 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.158103 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.158115 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.160906 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.166320 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.166483 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.166496 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.166621 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.170448 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.170608 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.170620 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.170746 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.170758 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.173828 -INFO: TimeDuration, Event = Pool_end, Time = 0.003070 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.176801 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.177169 -INFO: TimeDuration, Event = Mul_end, Time = 0.000367 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.177183 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.177252 -INFO: TimeDuration, Event = Add_end, Time = 0.000069 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.177268 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.177355 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000087 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.194505, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.198256 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.198744 -INFO: TimeDuration, Event = Add_end, Time = 0.000488 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.198761 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.199206 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.206860 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.207322 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.207337 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.207773 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.207789 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.211363 -INFO: TimeDuration, Event = Pool_end, Time = 0.003573 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.222776 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.223035 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.223053 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.223282 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.230212 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.230463 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.230476 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.230706 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.230717 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.233508 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.238894 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.239056 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.239070 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.239195 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.243009 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.243169 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.243181 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.243306 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.243318 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.246241 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.246261 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.246332 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.246345 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.246367 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.246380 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.246423 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.376974, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.271741 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.272276 -INFO: TimeDuration, Event = Add_end, Time = 0.000535 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.272682 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.273144 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000461 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.282762 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.283252 -INFO: TimeDuration, Event = Add_end, Time = 0.000490 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.283287 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.283744 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000457 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.283775 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.287206 -INFO: TimeDuration, Event = Pool_end, Time = 0.003431 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.298792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.299052 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.299074 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.299311 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.306498 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.306758 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.306776 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.307009 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.307024 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.309793 -INFO: TimeDuration, Event = Pool_end, Time = 0.002769 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.315299 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.315466 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.315483 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.315613 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.319588 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.319755 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.319771 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.319901 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.319915 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.322824 -INFO: TimeDuration, Event = Pool_end, Time = 0.002908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.322850 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.322930 -INFO: TimeDuration, Event = Mul_end, Time = 0.000080 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.322947 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.322972 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.322989 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.323040 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 56.623812, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.345490 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.345964 -INFO: TimeDuration, Event = Add_end, Time = 0.000475 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.345981 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.346422 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.354132 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.354598 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.354614 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.355050 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.355065 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.358628 -INFO: TimeDuration, Event = Pool_end, Time = 0.003564 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.370137 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.370400 -INFO: TimeDuration, Event = Add_end, Time = 0.000263 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.370419 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.370650 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.377649 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.377904 -INFO: TimeDuration, Event = Add_end, Time = 0.000255 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.377918 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.378150 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.378164 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.380955 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.386392 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.386557 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.386571 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.386698 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.390566 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.390728 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.390741 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.390867 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.390881 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.393800 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.393820 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.393894 -INFO: TimeDuration, Event = Mul_end, Time = 0.000074 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.393909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.393931 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.393951 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.393998 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000047 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.523118, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.418587 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.419108 -INFO: TimeDuration, Event = Add_end, Time = 0.000521 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.419128 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.419593 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000465 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.427843 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.428318 -INFO: TimeDuration, Event = Add_end, Time = 0.000475 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.428344 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.428793 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000449 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.428816 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.432328 -INFO: TimeDuration, Event = Pool_end, Time = 0.003511 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.443914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.444179 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.444200 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.444437 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000237 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.451614 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.451868 -INFO: TimeDuration, Event = Add_end, Time = 0.000254 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.451884 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.452118 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.452133 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.454917 -INFO: TimeDuration, Event = Pool_end, Time = 0.002784 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.460507 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.460682 -INFO: TimeDuration, Event = Add_end, Time = 0.000175 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.460701 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.460833 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000132 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.464780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.464946 -INFO: TimeDuration, Event = Add_end, Time = 0.000166 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.464962 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.465092 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.465107 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.468018 -INFO: TimeDuration, Event = Pool_end, Time = 0.002912 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.468042 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.468122 -INFO: TimeDuration, Event = Mul_end, Time = 0.000080 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.468139 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.468165 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.468182 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.468234 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 53.965671, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.490459 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.490938 -INFO: TimeDuration, Event = Add_end, Time = 0.000479 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.490958 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.491404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000446 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.499386 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.499857 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.499879 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.500320 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.500477 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.503868 -INFO: TimeDuration, Event = Pool_end, Time = 0.003391 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.515461 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.515726 -INFO: TimeDuration, Event = Add_end, Time = 0.000265 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.515747 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.515982 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.523155 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.523411 -INFO: TimeDuration, Event = Add_end, Time = 0.000256 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.523427 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.523661 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.523676 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.526455 -INFO: TimeDuration, Event = Pool_end, Time = 0.002779 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.531996 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.532165 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.532181 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.532314 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000132 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.536313 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.536480 -INFO: TimeDuration, Event = Add_end, Time = 0.000167 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.536499 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.536628 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000130 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.536643 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.539547 -INFO: TimeDuration, Event = Pool_end, Time = 0.002904 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.539571 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.539650 -INFO: TimeDuration, Event = Mul_end, Time = 0.000079 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.539667 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.539693 -INFO: TimeDuration, Event = Add_end, Time = 0.000025 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.539710 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.539761 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.674588, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.561781 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.562265 -INFO: TimeDuration, Event = Add_end, Time = 0.000483 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.562286 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.562731 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.570859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.571326 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.571347 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.571783 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.571802 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.575338 -INFO: TimeDuration, Event = Pool_end, Time = 0.003536 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.586930 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.587193 -INFO: TimeDuration, Event = Add_end, Time = 0.000263 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.587214 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.587448 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000234 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.594617 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.594872 -INFO: TimeDuration, Event = Add_end, Time = 0.000256 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.594889 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.595122 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.595136 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.597914 -INFO: TimeDuration, Event = Pool_end, Time = 0.002778 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.606449 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.606630 -INFO: TimeDuration, Event = Add_end, Time = 0.000181 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.606649 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.606789 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000141 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.610734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.610947 -INFO: TimeDuration, Event = Add_end, Time = 0.000213 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.610965 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.611094 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.611109 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.613973 -INFO: TimeDuration, Event = Pool_end, Time = 0.002865 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.613995 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.614072 -INFO: TimeDuration, Event = Mul_end, Time = 0.000078 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.614087 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.614114 -INFO: TimeDuration, Event = Add_end, Time = 0.000027 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.614129 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.614183 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000053 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 54.480835, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.634978 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.635457 -INFO: TimeDuration, Event = Add_end, Time = 0.000478 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.635472 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.635912 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.643452 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.643916 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.643932 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.644371 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.644382 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.647931 -INFO: TimeDuration, Event = Pool_end, Time = 0.003549 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.659355 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.659613 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.659629 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.659860 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.666797 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.667047 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.667060 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.667291 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.667303 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.670092 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.675502 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.675664 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.675676 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.675804 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.679597 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.679762 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.679774 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.679898 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.679909 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.682831 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.682850 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.682920 -INFO: TimeDuration, Event = Mul_end, Time = 0.000069 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.682933 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.682954 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.682968 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.683017 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.925242, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.703579 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.704047 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.704063 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.704502 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.712074 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.712538 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.712552 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.712991 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.713003 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.716547 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.727974 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.728233 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.728250 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.728481 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.735415 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.735666 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.735679 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.735908 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.735920 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.738708 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.744071 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.744232 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.744245 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.744372 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.748180 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.748538 -INFO: TimeDuration, Event = Add_end, Time = 0.000358 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.748554 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.748677 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000123 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.748689 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.751415 -INFO: TimeDuration, Event = Pool_end, Time = 0.002727 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.751434 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.751505 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.751518 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.751540 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.751553 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.751604 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.853317, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.772105 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.772583 -INFO: TimeDuration, Event = Add_end, Time = 0.000477 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.772595 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.773036 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.780575 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.781042 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.781060 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.781494 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000435 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.781510 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.785053 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.796501 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.796761 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.796778 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.797008 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.803959 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.804209 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.804222 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.804452 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.804461 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.807248 -INFO: TimeDuration, Event = Pool_end, Time = 0.002787 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.812751 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.812916 -INFO: TimeDuration, Event = Add_end, Time = 0.000165 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.812929 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.813055 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.816860 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.817021 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.817033 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.817159 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.817171 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.820094 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.820112 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.820182 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.820194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.820215 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.820228 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.820277 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.030344, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.840859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.841331 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.841347 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.841787 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.849326 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.849794 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.849810 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.850248 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.850261 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.853800 -INFO: TimeDuration, Event = Pool_end, Time = 0.003539 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.865235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.865494 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.865510 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.865741 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.872678 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.872927 -INFO: TimeDuration, Event = Add_end, Time = 0.000249 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.872941 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.873169 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.873181 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.875971 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.881346 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.881509 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.881521 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.881649 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.885427 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.885587 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.885599 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.885724 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.885736 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.888659 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.888678 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.888749 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.888762 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.888783 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.888797 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.888846 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.859689, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.909338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.909806 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.909823 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.910261 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.917993 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.918464 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.918479 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.918918 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.918931 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.922469 -INFO: TimeDuration, Event = Pool_end, Time = 0.003538 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.933900 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.934157 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.934174 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.934405 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.941355 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.941607 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.941626 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.941856 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.941868 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.944646 -INFO: TimeDuration, Event = Pool_end, Time = 0.002779 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.950083 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.950245 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.950258 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.950382 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.954194 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.954354 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.954366 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.954492 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.954504 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.957428 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352780.957447 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352780.957516 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.957530 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.957551 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352780.957565 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352780.957609 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.140633, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.978336 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.978807 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.978822 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.979264 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352780.986800 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352780.987262 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352780.987277 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352780.987724 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000447 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352780.987738 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352780.991282 -INFO: TimeDuration, Event = Pool_end, Time = 0.003544 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.002711 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.002969 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.002985 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.003215 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.010159 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.010409 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.010422 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.010653 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.010664 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.013453 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.018836 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.018996 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.019009 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.019135 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.024928 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.025097 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.025111 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.025239 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.025250 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.028162 -INFO: TimeDuration, Event = Pool_end, Time = 0.002912 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.028182 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.028270 -INFO: TimeDuration, Event = Mul_end, Time = 0.000088 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.028283 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.028326 -INFO: TimeDuration, Event = Add_end, Time = 0.000042 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.028480 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.028531 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 52.268981, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.052246 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.052719 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.052737 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.053181 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.060759 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.061225 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.061242 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.061679 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.061692 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.065235 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.076670 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.076930 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.076948 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.077177 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.084147 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.084399 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.084483 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.084711 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000227 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.084723 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.087442 -INFO: TimeDuration, Event = Pool_end, Time = 0.002719 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.092828 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.092991 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.093004 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.093129 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.096928 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.097089 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.097102 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.097229 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.097241 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.100162 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.100180 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.100252 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.100266 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.100286 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.100300 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.100356 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000056 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.926406, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.121205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.121675 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.121691 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.122130 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.129673 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.130136 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.130151 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.130585 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.130598 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.134172 -INFO: TimeDuration, Event = Pool_end, Time = 0.003573 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.145577 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.145837 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.145854 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.146085 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.153036 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.153286 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.153299 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.153527 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.153540 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.156331 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.161709 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.161872 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.161885 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.162011 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.165829 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.165989 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.166002 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.166127 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.166139 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.169063 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.169081 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.169150 -INFO: TimeDuration, Event = Mul_end, Time = 0.000069 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.169164 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.169185 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.169198 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.169247 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.924441, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.190475 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.190941 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.190958 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.191399 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.198973 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.199439 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.199453 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.199890 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.199919 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.203455 -INFO: TimeDuration, Event = Pool_end, Time = 0.003536 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.214889 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.215146 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.215163 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.215394 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.222326 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.222575 -INFO: TimeDuration, Event = Add_end, Time = 0.000249 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.222588 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.222818 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.222829 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.225622 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.231007 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.231168 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.231180 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.231305 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.235107 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.235268 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.235280 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.235406 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.235418 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.238341 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.238360 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.238430 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.238442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.238463 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.238478 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.238625 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000147 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.451014, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.260734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.261202 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.261219 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.261659 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.268918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.269382 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.269398 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.269832 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.269846 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.273396 -INFO: TimeDuration, Event = Pool_end, Time = 0.003550 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.284875 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.285135 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.285151 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.285382 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.292329 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.292580 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.292594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.292824 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.292836 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.295602 -INFO: TimeDuration, Event = Pool_end, Time = 0.002766 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.300988 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.301152 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.301165 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.301291 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.305073 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.305233 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.305245 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.305370 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.305382 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.308313 -INFO: TimeDuration, Event = Pool_end, Time = 0.002931 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.308481 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.308553 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.308566 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.308588 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.308601 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.308651 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.669377, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.328991 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.329456 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.329473 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.329911 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.337465 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.337929 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.337945 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.338380 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000435 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.338394 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.341939 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.353381 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.353640 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.353657 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.353925 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000268 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.360837 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.361088 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.361100 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.361330 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.361342 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.364133 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.369527 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.369690 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.369702 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.369830 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.373663 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.373824 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.373837 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.373965 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.373990 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.376905 -INFO: TimeDuration, Event = Pool_end, Time = 0.002915 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.376925 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.376998 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.377012 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.377033 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.377046 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.377090 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000043 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.978817, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.416795 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.417269 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.417287 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.417732 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.425297 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.425760 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.425776 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.426214 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.426226 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.429774 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.441246 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.441506 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.441522 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.441755 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.448718 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.448968 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.448981 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.449213 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.449225 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.452014 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.457418 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.457581 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.457594 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.457722 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000128 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.461543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.461704 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.461717 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.461842 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.461854 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.464784 -INFO: TimeDuration, Event = Pool_end, Time = 0.002931 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.464804 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.464875 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.464889 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.464910 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.464924 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.464975 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.126807, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.485389 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.485859 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.485875 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.486315 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.493902 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.494366 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.494384 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.494819 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000435 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.494835 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.498380 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.509830 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.510089 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.510106 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.510335 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.517299 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.517551 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.517564 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.517795 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.517807 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.520592 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.525980 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.526143 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.526156 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.526281 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.530106 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.530266 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.530279 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.530404 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.530417 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.533341 -INFO: TimeDuration, Event = Pool_end, Time = 0.002925 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.533361 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.533433 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.533447 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.533468 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.533483 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.533526 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.053880, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.553746 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.554223 -INFO: TimeDuration, Event = Add_end, Time = 0.000478 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.554238 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.554675 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.562248 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.562715 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.562732 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.563169 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.563184 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.566726 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.578175 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.578435 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.578452 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.578682 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.585651 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.585903 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.585917 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.586148 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.586161 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.588945 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.594338 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.594501 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.594515 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.594641 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.598484 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.598645 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.598658 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.598784 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.598797 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.601719 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.601739 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.601812 -INFO: TimeDuration, Event = Mul_end, Time = 0.000073 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.601825 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.601846 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.601860 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.601912 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.155274, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.625675 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.626146 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.626163 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.626604 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.634131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.634597 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.634613 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.635051 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.635063 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.638609 -INFO: TimeDuration, Event = Pool_end, Time = 0.003546 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.650041 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.650301 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.650317 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.650550 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.657481 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.657731 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.657744 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.657974 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.657985 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.660777 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.666166 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.666328 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.666341 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.666466 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.670290 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.670450 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.670462 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.670586 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.670598 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.673525 -INFO: TimeDuration, Event = Pool_end, Time = 0.002927 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.673544 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.673616 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.673629 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.673651 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.673664 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.673714 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.992529, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.694202 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.694674 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.694689 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.695128 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.702652 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.703118 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.703132 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.703570 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.703585 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.707132 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.718571 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.718853 -INFO: TimeDuration, Event = Add_end, Time = 0.000282 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.718871 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.719102 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.726015 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.726266 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.726278 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.726507 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.726519 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.729310 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.734682 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.734845 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.734858 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.734987 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.738791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.738952 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.738965 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.739090 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.739101 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.742024 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.742042 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.742112 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.742127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.742149 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.742162 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.742212 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.136134, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.763058 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.763530 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.763545 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.763986 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.771558 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.772023 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.772041 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.772479 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.772490 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.776034 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.787462 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.787719 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.787735 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.787966 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.794894 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.795146 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.795158 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.795388 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.795398 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.798190 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.803568 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.803729 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.803742 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.803868 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.807664 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.807824 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.807838 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.807964 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.807975 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.810896 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.810915 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.810986 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.810998 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.811019 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.811033 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.811083 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.906287, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.832048 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.832518 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.832712 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.833150 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.840540 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.841007 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.841022 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.841461 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.841475 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.845014 -INFO: TimeDuration, Event = Pool_end, Time = 0.003539 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.856449 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.856709 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.856725 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.856971 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000245 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.863908 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.864158 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.864171 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.864401 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.864480 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.867200 -INFO: TimeDuration, Event = Pool_end, Time = 0.002721 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.872585 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.872747 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.872760 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.872886 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.876698 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.876857 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.876870 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.876995 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.877007 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.879932 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.879951 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.880021 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.880034 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.880056 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.880069 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.880119 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.888758, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.900726 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.901193 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.901210 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.901654 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000444 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.909231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.909708 -INFO: TimeDuration, Event = Add_end, Time = 0.000477 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.909724 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.910165 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.910179 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.913713 -INFO: TimeDuration, Event = Pool_end, Time = 0.003534 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.925345 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.925643 -INFO: TimeDuration, Event = Add_end, Time = 0.000298 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.925661 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.925892 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.932805 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.933055 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.933068 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.933300 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.933312 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.936102 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.941485 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.941648 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.941661 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.941786 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.945662 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.945823 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.945834 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.945960 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.945971 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.948898 -INFO: TimeDuration, Event = Pool_end, Time = 0.002927 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352781.948918 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352781.949003 -INFO: TimeDuration, Event = Mul_end, Time = 0.000085 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.949019 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.949044 -INFO: TimeDuration, Event = Add_end, Time = 0.000024 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352781.949057 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352781.949104 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000047 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.262645, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.970047 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.970514 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.970530 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.970968 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.978573 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.979038 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.979055 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.979491 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352781.979506 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352781.983049 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352781.994493 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352781.994755 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352781.994771 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352781.995003 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.001985 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.002236 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.002250 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.002481 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.002493 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.005280 -INFO: TimeDuration, Event = Pool_end, Time = 0.002786 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.010659 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.010822 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.010835 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.010973 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000138 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.014797 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.014958 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.014971 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.015095 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.015108 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.018031 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.018050 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.018121 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.018135 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.018156 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.018170 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.018214 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.385773, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.039035 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.039504 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.039521 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.039960 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.047554 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.048019 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.048037 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.048475 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.048486 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.052051 -INFO: TimeDuration, Event = Pool_end, Time = 0.003565 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.063472 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.063732 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.063748 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.063978 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.070932 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.071183 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.071196 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.071424 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000228 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.071436 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.074228 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.079614 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.079776 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.079789 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.079915 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.083806 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.083967 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.083981 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.084105 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000124 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.084117 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.087041 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.087060 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.087132 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.087146 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.087168 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.087183 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.087233 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.110609, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.107988 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.108458 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.108717 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.109158 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.116537 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.117004 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.117021 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.117464 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.117481 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.121017 -INFO: TimeDuration, Event = Pool_end, Time = 0.003535 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.132489 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.132750 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.132767 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.132998 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.139980 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.140230 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.140243 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.140480 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000236 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.140492 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.143274 -INFO: TimeDuration, Event = Pool_end, Time = 0.002781 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.148704 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.148868 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.148880 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.149007 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.152859 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.153020 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.153034 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.153161 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.153174 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.156094 -INFO: TimeDuration, Event = Pool_end, Time = 0.002920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.156114 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.156186 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.156200 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.156222 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.156235 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.156279 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.956923, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.176760 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.177231 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.177247 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.177688 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.185265 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.185733 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.185749 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.186188 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.186201 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.189742 -INFO: TimeDuration, Event = Pool_end, Time = 0.003541 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.201211 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.201472 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.201488 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.201720 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.208693 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.208947 -INFO: TimeDuration, Event = Add_end, Time = 0.000255 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.208961 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.209191 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.209203 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.211988 -INFO: TimeDuration, Event = Pool_end, Time = 0.002785 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.217381 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.217543 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.217556 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.217684 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.221732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.221894 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.221906 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.222033 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.222046 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.224965 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.224985 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.225057 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.225070 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.225092 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.225105 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.225154 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000049 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.307058, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.245752 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.246224 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.246239 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.246680 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.255710 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.256173 -INFO: TimeDuration, Event = Add_end, Time = 0.000463 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.256189 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.256626 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.256644 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.260189 -INFO: TimeDuration, Event = Pool_end, Time = 0.003545 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.271621 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.271881 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.271898 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.272127 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.279098 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.279351 -INFO: TimeDuration, Event = Add_end, Time = 0.000253 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.279365 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.279595 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.279619 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.282395 -INFO: TimeDuration, Event = Pool_end, Time = 0.002776 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.287780 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.287943 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.287956 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.288081 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.291901 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.292063 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.292076 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.292203 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.292215 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.295151 -INFO: TimeDuration, Event = Pool_end, Time = 0.002936 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.295169 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.295239 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.295253 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.295274 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.295288 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.295352 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000064 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.559673, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.315734 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.316196 -INFO: TimeDuration, Event = Add_end, Time = 0.000462 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.316210 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.316650 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.324228 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.324692 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.324709 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.325148 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.325163 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.328866 -INFO: TimeDuration, Event = Pool_end, Time = 0.003704 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.340148 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.340407 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.340422 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.340658 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000235 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.347634 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.347885 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.347898 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.348129 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.348141 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.350937 -INFO: TimeDuration, Event = Pool_end, Time = 0.002796 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.356322 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.356484 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.356498 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.356625 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.360451 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.360612 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.360626 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.360751 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.360763 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.363687 -INFO: TimeDuration, Event = Pool_end, Time = 0.002924 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.363706 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.363777 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.363791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.363813 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.363827 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.363877 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.040010, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.384723 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.385198 -INFO: TimeDuration, Event = Add_end, Time = 0.000474 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.385214 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.385654 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.392962 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.393426 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.393443 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.393880 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000438 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.393896 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.397433 -INFO: TimeDuration, Event = Pool_end, Time = 0.003537 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.408875 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.409136 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.409153 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.409385 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.416360 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.416612 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.416624 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.416857 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000233 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.416869 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.419651 -INFO: TimeDuration, Event = Pool_end, Time = 0.002782 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.425041 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.425204 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.425218 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.425344 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.429166 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.429327 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.429342 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.429468 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.429479 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.432408 -INFO: TimeDuration, Event = Pool_end, Time = 0.002928 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.432486 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.432557 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.432570 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.432592 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.432606 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.432649 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.161508, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.453686 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.454152 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.454169 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.454608 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.462188 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.462651 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.462669 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.463104 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000434 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.463115 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.466662 -INFO: TimeDuration, Event = Pool_end, Time = 0.003548 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.478106 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.478365 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.478380 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.478611 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.485581 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.485837 -INFO: TimeDuration, Event = Add_end, Time = 0.000255 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.485850 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.486080 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.486093 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.488875 -INFO: TimeDuration, Event = Pool_end, Time = 0.002783 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.494321 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.494485 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.494499 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.494625 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.498457 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.498618 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.498631 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.498756 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.498769 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.501691 -INFO: TimeDuration, Event = Pool_end, Time = 0.002923 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.501711 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.501780 -INFO: TimeDuration, Event = Mul_end, Time = 0.000069 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.501794 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.501815 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.501830 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.501881 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.269181, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.522955 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.523425 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.523440 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.523877 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.531462 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.531927 -INFO: TimeDuration, Event = Add_end, Time = 0.000465 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.531943 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.532383 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.532611 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.535940 -INFO: TimeDuration, Event = Pool_end, Time = 0.003329 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.547387 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.547647 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.547664 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.547894 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.554862 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.555114 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.555127 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.555357 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.555369 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.558158 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.563579 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.563741 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.563755 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.563880 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.567691 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.567851 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.567864 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.567989 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.568001 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.570923 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.570942 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.571012 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.571026 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.571047 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.571062 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.571112 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.068627, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.591986 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.592455 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.592468 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.592907 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.600498 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.600965 -INFO: TimeDuration, Event = Add_end, Time = 0.000467 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.600981 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.601420 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.601434 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.604975 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.616426 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.616687 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.616704 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.616935 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.623909 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.624167 -INFO: TimeDuration, Event = Add_end, Time = 0.000258 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.624180 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.624411 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.624421 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.627211 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.632672 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.632834 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.632847 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.632973 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.636795 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.636956 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.636968 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.637094 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.637106 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.640028 -INFO: TimeDuration, Event = Pool_end, Time = 0.002922 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.640047 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.640117 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.640131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.640153 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.640167 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.640217 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.400002 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.133871, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.660913 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.661385 -INFO: TimeDuration, Event = Add_end, Time = 0.000471 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.661402 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.661845 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000443 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.669401 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.669867 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.669883 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.670320 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.670333 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.673875 -INFO: TimeDuration, Event = Pool_end, Time = 0.003542 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.685323 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.685583 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.685600 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.685830 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.692793 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.693044 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.693057 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.693287 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.693300 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.696088 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.701556 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.701720 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.701734 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.701860 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.705732 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.705894 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.705907 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.706033 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.706045 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.708965 -INFO: TimeDuration, Event = Pool_end, Time = 0.002919 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.708985 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.709055 -INFO: TimeDuration, Event = Mul_end, Time = 0.000070 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.709069 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.709090 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.709104 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.709155 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.142887, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.729750 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.730218 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.730235 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.730677 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.740292 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.741095 -INFO: TimeDuration, Event = Add_end, Time = 0.000802 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.741115 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.741551 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.741567 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.744874 -INFO: TimeDuration, Event = Pool_end, Time = 0.003307 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.755095 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.755352 -INFO: TimeDuration, Event = Add_end, Time = 0.000257 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.755369 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.755599 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.762587 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.762837 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.762850 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.763079 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.763091 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.765881 -INFO: TimeDuration, Event = Pool_end, Time = 0.002790 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.771268 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.771429 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.771443 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.771569 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.775476 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.775636 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.775648 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.775773 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000125 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.775784 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.778712 -INFO: TimeDuration, Event = Pool_end, Time = 0.002927 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.778731 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.778808 -INFO: TimeDuration, Event = Mul_end, Time = 0.000077 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.778821 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.778844 -INFO: TimeDuration, Event = Add_end, Time = 0.000023 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.778862 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.778910 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000048 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 51.094697, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.799753 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.800225 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.800242 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.800682 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.808264 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.808730 -INFO: TimeDuration, Event = Add_end, Time = 0.000466 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.808748 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.809184 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.809199 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.812742 -INFO: TimeDuration, Event = Pool_end, Time = 0.003543 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.824197 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.824571 -INFO: TimeDuration, Event = Add_end, Time = 0.000374 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.824591 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.824820 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.831666 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.831917 -INFO: TimeDuration, Event = Add_end, Time = 0.000251 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.831930 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.832160 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.832172 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.834966 -INFO: TimeDuration, Event = Pool_end, Time = 0.002793 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.840353 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.840516 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.840529 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.840656 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.844547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.844708 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.844722 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.844849 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.844861 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.847726 -INFO: TimeDuration, Event = Pool_end, Time = 0.002865 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.847745 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.847816 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.847830 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.847852 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.847867 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.847919 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.199997 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.016341, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.868773 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.869243 -INFO: TimeDuration, Event = Add_end, Time = 0.000470 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.869258 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.869701 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000442 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.877281 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.877751 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.877767 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.878204 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000437 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.878219 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.881755 -INFO: TimeDuration, Event = Pool_end, Time = 0.003537 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.893205 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.893464 -INFO: TimeDuration, Event = Add_end, Time = 0.000259 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.893481 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.893711 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.900683 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.900935 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.900948 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.901178 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000229 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.901190 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.903981 -INFO: TimeDuration, Event = Pool_end, Time = 0.002791 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.909369 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.909532 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.909545 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.909673 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.913498 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.913658 -INFO: TimeDuration, Event = Add_end, Time = 0.000160 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.913671 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.913798 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.913810 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.916728 -INFO: TimeDuration, Event = Pool_end, Time = 0.002918 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.916748 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.916819 -INFO: TimeDuration, Event = Mul_end, Time = 0.000071 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.916832 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.916854 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.916868 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.916912 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000044 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 84.000000 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.050766, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.937754 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.938227 -INFO: TimeDuration, Event = Add_end, Time = 0.000473 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.938244 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.938684 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000440 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.946253 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.946717 -INFO: TimeDuration, Event = Add_end, Time = 0.000464 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.946734 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.947170 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000436 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.947185 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.950725 -INFO: TimeDuration, Event = Pool_end, Time = 0.003540 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.962179 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.962439 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.962456 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.962688 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.969687 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.969939 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.969952 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.970184 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000231 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.970196 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.972985 -INFO: TimeDuration, Event = Pool_end, Time = 0.002789 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.978390 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.978558 -INFO: TimeDuration, Event = Add_end, Time = 0.000168 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.978571 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.978697 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.982554 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.982715 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352782.982728 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352782.982854 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000126 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352782.982867 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352782.985788 -INFO: TimeDuration, Event = Pool_end, Time = 0.002920 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352782.985807 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352782.985878 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352782.985892 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352782.985914 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352782.985931 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352782.985983 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000052 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 85.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.291339, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.007325 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.007804 -INFO: TimeDuration, Event = Add_end, Time = 0.000479 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.007818 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.008257 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000439 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.015840 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.016312 -INFO: TimeDuration, Event = Add_end, Time = 0.000472 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.016326 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.016767 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352783.016782 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352783.020329 -INFO: TimeDuration, Event = Pool_end, Time = 0.003547 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.031762 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.032022 -INFO: TimeDuration, Event = Add_end, Time = 0.000260 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.032040 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.032270 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.039233 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.039484 -INFO: TimeDuration, Event = Add_end, Time = 0.000250 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.039497 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.039726 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352783.039738 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352783.042530 -INFO: TimeDuration, Event = Pool_end, Time = 0.002792 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.047914 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.048077 -INFO: TimeDuration, Event = Add_end, Time = 0.000163 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.048091 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.048217 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.052049 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.052211 -INFO: TimeDuration, Event = Add_end, Time = 0.000162 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.052224 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.052353 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000129 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352783.052465 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352783.055285 -INFO: TimeDuration, Event = Pool_end, Time = 0.002820 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352783.055303 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352783.055375 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.055389 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.055411 -INFO: TimeDuration, Event = Add_end, Time = 0.000022 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352783.055425 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352783.055475 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000050 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 83.599998 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 49.971234, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -DEBUG: ***--- size_in_bytes = 6144000 -DEBUG: Attempting to Allocate = 6144000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 3072, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: Moving 6144000 bytes from host to GPU -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.076248 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.076717 -INFO: TimeDuration, Event = Add_end, Time = 0.000469 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.076733 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.077178 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000445 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 65536000 -DEBUG: Attempting to Allocate = 65536000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 32768, cStride = 1024, hStride = 32, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.084746 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 16384000 -INFO: bias->num_elems = 32 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.085214 -INFO: TimeDuration, Event = Add_end, Time = 0.000468 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.085230 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.085671 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000441 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352783.085685 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 32, h = 16, w = 16 , dim1 = 32 , dim2 = 32 -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352783.089224 -INFO: TimeDuration, Event = Pool_end, Time = 0.003539 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.100988 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.101249 -INFO: TimeDuration, Event = Add_end, Time = 0.000261 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.101267 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.101498 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000232 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 32768000 -DEBUG: Attempting to Allocate = 32768000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 16384, cStride = 256, hStride = 16, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.108495 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 8192000 -INFO: bias->num_elems = 64 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.108747 -INFO: TimeDuration, Event = Add_end, Time = 0.000252 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.108761 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.108991 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000230 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352783.109003 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 64, h = 8, w = 8 , dim1 = 16 , dim2 = 16 -DEBUG: ***--- size_in_bytes = 8192000 -DEBUG: Attempting to Allocate = 8192000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 4096, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352783.111791 -INFO: TimeDuration, Event = Pool_end, Time = 0.002788 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.117208 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.117372 -INFO: TimeDuration, Event = Add_end, Time = 0.000164 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.117386 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.117516 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000131 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for ConvLayer -*** Convolution - ApproxChoice = 2 - BatchNorm = 1 - CONV = 2 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: FP32 BASELINE -DEBUG: ***--- size_in_bytes = 16384000 -DEBUG: Attempting to Allocate = 16384000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 8192, cStride = 64, hStride = 8, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.121344 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 4096000 -INFO: bias->num_elems = 128 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.121505 -INFO: TimeDuration, Event = Add_end, Time = 0.000161 -INFO: *** TensorTanh -INFO: AbsoluteTime, Event = Tanh, Time = 1607352783.121517 -DEBUG: No data movement required - Data on Device -INFO: AbsoluteTime, Event = Tanh_end, Time = 1607352783.121644 -INFO: TimeDuration, Event = Tanh_end, Time = 0.000127 -INFO: *** TensorPooling -INFO: AbsoluteTime, Event = Pool, Time = 1607352783.121656 -DEBUG: No data movement required - Data on Device -DEBUG: n = 500, c = 128, h = 4, w = 4 , dim1 = 8 , dim2 = 8 -DEBUG: ***--- size_in_bytes = 4096000 -DEBUG: Attempting to Allocate = 4096000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 2048, cStride = 16, hStride = 4, wStride = 1 -DEBUG: tensor->data_format = 0 -INFO: AbsoluteTime, Event = Pool_end, Time = 1607352783.124577 -INFO: TimeDuration, Event = Pool_end, Time = 0.002921 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: GPU Configuration for FCLayer -INFO: *** TensorGemmGPU -INFO: AbsoluteTime, Event = Mul, Time = 1607352783.124597 -INFO: rhs->dims.num_dims = 4 -INFO: lhs->dims.num_dims = 4 -INFO: m = 500, n = 10, k = 2048 -DEBUG: Creating new TENSOR * -DEBUG: ***--- size_in_bytes = 20000 -DEBUG: Attempting to Allocate = 20000 - - -DEBUG: tensor->data_format = 0 -INFO: nStride = 10, cStride = 1, hStride = 1, wStride = 1 -DEBUG: tensor->data_format = 0 -DEBUG: Changing placement * -DEBUG: Changed Placement * - -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -DEBUG: CuBlasSgemm * -INFO: AbsoluteTime, Event = Mul_end, Time = 1607352783.124669 -INFO: TimeDuration, Event = Mul_end, Time = 0.000072 -INFO: *** TensorAdd -INFO: AbsoluteTime, Event = Add, Time = 1607352783.124682 -DEBUG: No data movement required - Data on Device -DEBUG: No data movement required - Data on Device -INFO: x->num_elems = 5000 -INFO: bias->num_elems = 10 -INFO: AbsoluteTime, Event = Add_end, Time = 1607352783.124703 -INFO: TimeDuration, Event = Add_end, Time = 0.000021 -INFO: No activation Function -DEBUG: No data movement required - Data on Device -INFO: *** TensorSoftmax -INFO: AbsoluteTime, Event = Softmax, Time = 1607352783.124717 -DEBUG: No data movement required - Data on Device -INFO: Moving 20000 bytes from GPU to host -INFO: AbsoluteTime, Event = Softmax_end, Time = 1607352783.124768 -INFO: TimeDuration, Event = Softmax_end, Time = 0.000051 -DEBUG: No data movement required - Data on Host -batch_dim = 500, num_classes = 10 -****** Accuracy = 86.800003 - -DEBUG: findNextConfiguration: Updated configurationIdx to 0. -DEBUG: findTargetConfiguration: goalVal: -0.060000, search kind: 2. -DEBUG: findTargetConfiguration: Updated configurationIdx to 0. -INFO: current iteration time = 50.430090, current iteration energy = 0.000000 - -DEBUG: **** Freeing Ouput Tensors *** -Exiting profiler -INFO: Writing Runtime Profile Info File... -INFO: Done writing profile. diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt deleted file mode 100644 index 9d6f975869964e8bb666262923172eac42a43151..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs.txt +++ /dev/null @@ -1,12 +0,0 @@ -2000 -+++++ -conf1 2.64294896823 0 84.24999995 -0.05999995000000524 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs2.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs2.txt deleted file mode 100644 index 8e4fae8e8ef42fb38c300efa8ccb8ab855561e43..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs2.txt +++ /dev/null @@ -1,4818 +0,0 @@ -+++++ -conf1 2.64294896823 0 84.24999995 -0.05999995000000524 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf2 1.545808927 0 83.99749985 0.19250015000000076 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf3 2.16268436875 0 83.9749998 0.21500019999999154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf4 2.85243095593 0 85.582500225 -1.3925002250000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf5 2.88810451479 0 85.6749999 -1.4849999000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf6 1.91299290156 0 84.285000025 -0.09500002500000448 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf7 2.32152521417 0 84.037500025 0.1524999749999978 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf8 1.72139681434 0 84.2400001 -0.05000010000000543 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf9 1.34088900571 0 84.46999985 -0.27999984999999583 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf10 2.15667646025 0 84.467500075 -0.27750007500000606 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf11 2.0991051315 0 84.747499875 -0.5574998750000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf12 2.26270700467 0 83.95499985 0.23500015000000474 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf13 2.30515485402 0 84.2474996 -0.05749959999999987 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf14 2.6444723214 0 84.177500025 0.012499974999997221 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf15 1.72142485041 0 84.20249985 -0.012499849999997537 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf16 1.25940820746 0 84.400002 -0.2100020000000029 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf17 1.66795168964 0 84.86250055 -0.6725005499999952 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf18 1.18154248199 0 84.5449999 -0.3549998999999957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf19 2.69527868518 0 84.082499475 0.10750052499999185 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf20 3.508798013 0 84.942500025 -0.7525000250000033 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf21 1.37857982221 0 84.415000575 -0.2250005749999957 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf22 2.04302762753 0 84.9100004 -0.7200004000000035 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf23 1.80326718101 0 83.8474996 0.3425003999999916 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf24 3.59584543185 0 84.917499975 -0.7274999750000006 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf25 1.68979688527 0 84.899999625 -0.7099996250000089 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf26 3.57472722452 0 85.085000125 -0.8950001249999957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf27 1.93608746968 0 84.092500575 0.0974994249999952 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf28 2.61505878203 0 84.652500925 -0.4625009250000005 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf29 2.80017803183 0 84.022500175 0.16749982499999305 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf30 1.77399722707 0 84.0 0.18999999999999773 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf31 1.95235606591 0 84.022500025 0.16749997499999836 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf32 1.68983290704 0 84.37500035 -0.1850003499999957 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf33 3.42702742371 0 84.99249975 -0.8024997499999955 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 5 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf34 1.68985992438 0 84.147499875 0.04250012500000366 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf35 1.79338784914 0 83.9824997 0.20750029999999242 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf36 2.18853801922 0 84.08249985 0.1075001499999928 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf37 1.94133991584 0 83.992500525 0.19749947500000076 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf38 1.545808927 0 84.317500175 -0.12750017500000865 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf39 1.68016960419 0 84.8924999 -0.7024999000000065 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf40 1.813707661 0 84.347500225 -0.15750022500000682 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf41 2.30148997712 0 84.24999985 -0.05999984999999697 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf42 3.68309655183 0 84.5824999 -0.39249990000000423 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf43 3.45039880338 0 84.4624997 -0.27249969999999735 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf44 1.37705672429 0 83.852499575 0.33750042500000177 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf45 2.14805395337 0 84.0050001 0.184999899999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf46 2.05402977897 0 83.8524999 0.33750009999999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf47 1.70318365569 0 84.06500015 0.1249998499999947 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf48 3.55780405071 0 84.007500125 0.18249987499999065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf49 1.83125102545 0 84.30250015 -0.11250015000000246 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf50 2.21893941807 0 84.0174997 0.17250029999999583 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf51 1.50399656712 0 83.837500325 0.35249967500000423 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf52 2.16303753646 0 84.2750006 -0.08500060000000076 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf53 3.68773262256 0 84.557500525 -0.36750052499999697 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf54 1.70253070344 0 84.047499675 0.14250032500000032 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf55 1.21172403585 0 84.27750055 -0.08750055000000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf56 2.44121698807 0 83.880000375 0.3099996250000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf57 1.24213756356 0 84.9699997 -0.7799997000000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf58 1.77222897301 0 84.467500325 -0.27750032500000543 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf59 3.41400766118 0 85.0825005 -0.892500499999997 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 7 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf60 1.74981457921 0 84.0449997 0.14500029999999242 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf61 2.66802889905 0 84.069999875 0.12000012500000423 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf62 1.23404106904 0 85.1050003 -0.9150003000000027 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf63 2.5721723393 0 84.19500045 -0.0050004499999971586 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf64 3.52044764786 0 85.060000625 -0.870000625000003 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf65 2.78728442166 0 85.515000175 -1.3250001749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf66 2.76471189661 0 84.397499775 -0.20749977500000227 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf67 1.76326606859 0 84.2224997 -0.03249970000000246 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf68 2.81387959471 0 85.367499725 -1.177499725000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf69 2.34792427364 0 85.347500275 -1.1575002750000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf70 2.16444954053 0 84.389999975 -0.19999997499999722 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf71 2.71129466224 0 84.49500005 -0.30500005000000385 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf72 2.12126150431 0 84.472500125 -0.28250012499999855 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf73 1.64774149134 0 84.717500125 -0.5275001250000031 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf74 2.53877485855 0 84.037500075 0.15249992500000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf75 2.20062372112 0 84.109999475 0.08000052500000265 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf76 2.08193610951 0 84.70999965 -0.5199996500000026 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf77 2.76479890201 0 84.432499925 -0.24249992500000417 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf78 2.83029289665 0 85.63500095 -1.4450009500000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf79 1.67773859853 0 84.44999925 -0.259999250000007 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf80 1.49477759283 0 83.94749985 0.24250014999999792 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf81 3.55427260758 0 84.6400004 -0.4500004000000075 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf82 1.66690077651 0 84.64499965 -0.45499965000000486 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf83 2.5407202431 0 84.030000125 0.1599998749999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf84 1.72467068675 0 85.365000325 -1.1750003249999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf85 3.68413346512 0 84.5499996 -0.35999960000000897 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf86 2.34426089698 0 85.3400005 -1.1500005000000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf87 1.5037937756 0 84.48750035 -0.2975003500000071 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf88 2.39067422722 0 84.052499975 0.13750002499999425 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf89 2.50707513602 0 83.980000125 0.20999987499999406 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf90 2.34799381894 0 85.31000005 -1.1200000500000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf91 2.66611715293 0 84.115000125 0.07499987500000316 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf92 3.20484476618 0 83.940000025 0.24999997499999438 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf93 1.69280257704 0 84.497500025 -0.30750002499999596 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf94 1.32318431888 0 83.8700001 0.3199998999999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf95 3.72287609902 0 84.807500325 -0.6175003250000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf96 3.25808614625 0 85.262499625 -1.072499625000006 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf97 2.01697928138 0 84.880000475 -0.690000475000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf98 3.57037076434 0 84.977500175 -0.7875001750000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf99 3.86425755697 0 84.172499625 0.017500374999997348 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf100 2.70848729777 0 85.605000725 -1.4150007249999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf101 3.51194589165 0 84.83499975 -0.6449997499999967 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf102 3.83462006201 0 84.757499275 -0.567499275000003 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf103 2.18215937305 0 84.295000275 -0.10500027500000897 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf104 3.49011584637 0 84.977499975 -0.7874999750000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf105 4.06050228976 0 84.3225001 -0.13250010000000145 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf106 1.89032564161 0 83.905000125 0.2849998749999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf107 4.30956760793 0 84.042499475 0.1475005249999981 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf108 3.55081672573 0 84.95249995 -0.7624999500000058 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf109 2.10324879556 0 84.179999775 0.010000224999998863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf110 3.29974923559 0 85.2299997 -1.0399996999999956 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf111 3.71617428087 0 84.604999725 -0.41499972500000126 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf112 1.88036454342 0 84.092500525 0.09749947499999223 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf113 1.32313131334 0 83.87500045 0.314999549999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf114 1.61009604666 0 84.1125004 0.0774995999999959 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf115 3.4913633548 0 84.9999997 -0.8099997000000059 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf116 2.46248494441 0 84.287499575 -0.0974995750000005 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf117 4.41513247874 0 84.150000225 0.039999774999998294 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf118 1.84459168829 0 84.1875 0.0024999999999977263 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf119 3.50857179065 0 85.134999625 -0.9449996250000083 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf120 1.80671337182 0 83.942499725 0.24750027499999305 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf121 3.54854477527 0 84.694999875 -0.5049998749999958 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf122 1.56738321503 0 84.067500175 0.12249982499999135 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf123 1.87469737853 0 85.257500125 -1.0675001250000093 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf124 3.62227590611 0 84.637499625 -0.44749962500000606 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf125 1.71563522059 0 84.4225002 -0.23250020000000404 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf126 3.50848120632 0 85.07749955 -0.8874995500000011 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf127 3.86941789017 0 84.8150002 -0.6250002000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf128 1.91620722727 0 84.2499998 -0.059999799999999937 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf129 1.44196866861 0 84.097499875 0.09250012500000082 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf130 3.66732089277 0 84.7300001 -0.5400001000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf131 2.23154899479 0 84.0849998 0.1050001999999921 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf132 1.91952997835 0 83.957500475 0.23249952499999438 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf133 3.71350030483 0 84.65000005 -0.460000050000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf134 3.81225222884 0 84.762499425 -0.5724994250000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf135 4.19275534826 0 84.072499925 0.11750007499999526 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf136 2.61331302144 0 85.622499825 -1.4324998250000078 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf137 2.76999273759 0 85.6424995 -1.4524995000000018 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf138 2.3065330436 0 85.5024998 -1.3124997999999977 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf139 3.49100459066 0 85.157499875 -0.9674998750000015 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf140 3.48563196276 0 85.027499775 -0.8374997749999977 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf141 2.65322761859 0 83.162500275 1.0274997249999984 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf142 2.65344076691 0 83.172500175 1.0174998250000016 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf143 2.18719023298 0 83.1900003 0.9999997000000036 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf144 2.60653577071 0 83.272499475 0.9175005249999941 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf145 4.46465646641 0 83.540000175 0.6499998249999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf146 2.15285399348 0 83.862499625 0.3275003749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf147 3.65175122588 0 83.2049996 0.9850004000000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf148 3.9660599653 0 83.552500175 0.6374998249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf149 4.53170414251 0 83.505000075 0.6849999249999996 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf150 3.71617428087 0 85.1624994 -0.9724994000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf151 1.70998952611 0 84.2649998 -0.0749998000000005 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf152 2.64828400486 0 84.325000225 -0.13500022499999886 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf153 2.13368332779 0 84.232500675 -0.04250067499999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf154 1.26929324517 0 84.4824997 -0.2924997000000076 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf155 2.41947038317 0 83.31250035 0.8774996500000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf156 2.79853043416 0 83.234999475 0.9550005250000027 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf157 2.07817661079 0 85.492499525 -1.3024995250000018 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf158 2.32136581814 0 85.4974998 -1.3074998000000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf159 2.57879469037 0 83.847500025 0.3424999749999955 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf160 1.4878499709 0 84.120000275 0.0699997250000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf161 1.68769620605 0 84.239999575 -0.04999957500000107 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf162 2.50393185826 0 83.449999775 0.7400002250000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf163 2.68734365023 0 83.3925005 0.7974995000000007 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf164 1.98226632853 0 83.96000065 0.22999934999999994 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf165 1.78321495824 0 84.0299997 0.160000299999993 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf166 2.88456051936 0 83.067500125 1.1224998750000026 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf167 3.46456452848 0 83.1649998 1.0250001999999938 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf168 2.14610519779 0 83.92750015 0.26249984999999754 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf169 2.15539598712 0 83.96749955 0.2225004499999983 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf170 2.14272739582 0 84.882500075 -0.6925000749999981 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf171 2.1141534062 0 85.482500225 -1.2925002249999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf172 4.65121391718 0 83.61999925 0.5700007499999913 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf173 1.90061509944 0 83.349999775 0.8400002249999972 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf174 1.65344959931 0 84.1374996 0.05250039999999956 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf175 2.41558217054 0 83.802499175 0.3875008250000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf176 1.69005187813 0 85.15749975 -0.9674997500000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf177 1.92025535635 0 84.01500035 0.17499965000000373 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf178 4.00517782251 0 83.68249965 0.5075003500000008 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf179 1.95590659474 0 84.4924997 -0.3024996999999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf180 4.16910464671 0 83.34249945 0.8475005499999924 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf181 3.60118640098 0 85.06749975 -0.8774997499999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf182 2.28437520771 0 85.487499975 -1.297499975000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf183 3.33515476381 0 83.3474995 0.8425004999999999 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf184 2.4412921699 0 83.73999985 0.45000014999999394 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf185 2.23810809251 0 83.697500125 0.4924998749999929 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf186 2.53508341283 0 83.97000005 0.21999995000000183 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf187 2.28301365028 0 83.904999975 0.2850000250000022 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf188 1.64297502615 0 83.485 0.7049999999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf189 3.36512250412 0 83.09750025 1.0924997500000018 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf190 3.72039948168 0 85.060000075 -0.8700000750000072 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf191 1.8677114471 0 85.1949999 -1.0049999000000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf192 3.34983276697 0 83.1325003 1.057499699999994 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf193 2.79708619899 0 84.067499525 0.12250047499999539 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf194 2.77595110268 0 83.347500475 0.8424995249999938 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf195 1.91798747232 0 83.177499725 1.0125002749999936 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf196 3.02719788308 0 83.809999925 0.3800000749999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf197 2.59147181998 0 84.225000325 -0.03500032499999861 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf198 4.13802989146 0 83.572500075 0.6174999250000042 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf199 3.82439653195 0 83.82249965 0.36750035000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf200 2.00918601238 0 83.52749955 0.662500449999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf201 3.59724691324 0 84.894999725 -0.7049997250000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf202 1.55181168477 0 85.2625004 -1.0725003999999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf203 2.51796807154 0 83.9549999 0.2350000999999935 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf204 1.93457438195 0 83.497500075 0.6924999249999928 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf205 2.79708619899 0 85.347499675 -1.1574996749999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf206 2.63716680826 0 83.817499125 0.37250087500000006 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf207 3.5413017311 0 84.825 -0.6350000000000051 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf208 2.14805395337 0 83.877499775 0.31250022499999375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf209 2.44534077265 0 83.677500375 0.5124996250000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf210 2.0522306547 0 85.384999175 -1.1949991750000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf211 4.43114738736 0 83.40499975 0.785000249999996 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf212 4.14598352007 0 83.56000045 0.6299995499999937 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf213 2.45681276337 0 83.007500125 1.1824998749999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf214 3.4621090664 0 84.95499985 -0.7649998499999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf215 1.98960444963 0 84.017500475 0.1724995249999921 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf216 2.64194541602 0 83.87749995 0.31250004999999703 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf217 1.86100158987 0 83.245000075 0.9449999250000047 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf218 2.09726577998 0 84.119999225 0.0700007749999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf219 2.50772231544 0 83.782500525 0.4074994749999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf220 2.46806682697 0 83.922500175 0.2674998250000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf221 3.46190747049 0 85.155000325 -0.9650003250000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 5 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf222 3.59715169228 0 85.040000075 -0.850000074999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf223 2.99394102561 0 83.1549999 1.0350000999999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf224 2.65199694711 0 84.2325001 -0.04250009999999804 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf225 3.03108712786 0 83.847500075 0.3424999249999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf226 2.73775314445 0 85.5525001 -1.3625001000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf227 2.08638938557 0 84.6874996 -0.4974995999999976 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf228 2.28640917883 0 83.815000325 0.374999674999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf229 4.65496735922 0 83.4199998 0.7700001999999984 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf230 2.40101445801 0 83.637499375 0.5525006249999933 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf231 3.99006059078 0 83.519999275 0.6700007249999942 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf232 3.5153328867 0 85.199999375 -1.0099993750000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf233 2.30821836295 0 83.512499675 0.6775003249999969 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf234 3.42534336841 0 85.0649998 -0.8749997999999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf235 1.99098571083 0 83.950000225 0.23999977500000114 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf236 1.97184253078 0 83.597499625 0.5925003750000002 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf237 2.65253450847 0 84.117500325 0.0724996750000031 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf238 4.26552823649 0 83.7325007 0.45749929999999495 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf239 1.56116021343 0 83.567500525 0.6224994749999979 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf240 2.230065491 0 83.257500275 0.9324997249999996 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf241 2.51796807154 0 83.33500005 0.8549999499999927 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf242 2.24875342151 0 85.01 -0.8200000000000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 5 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf243 2.59512211904 0 84.0050006 0.18499939999999526 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf244 2.03674680878 0 84.125000175 0.06499982500000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf245 3.01536982747 0 83.177498925 1.012501075000003 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf246 2.77326672033 0 83.430000075 0.7599999250000025 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf247 3.9641852392 0 83.727500525 0.4624994750000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf248 3.60277274156 0 85.1350003 -0.9450003000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf249 3.1600407978 0 83.52249965 0.6675003499999974 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf250 3.19015352593 0 83.9724998 0.21750020000000347 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf251 2.08092928034 0 85.410000475 -1.2200004750000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf252 4.38699651938 0 83.800000025 0.38999997499999495 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf253 2.31554800621 0 83.66999985 0.5200001500000013 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf254 4.0571911997 0 83.607499675 0.582500324999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf255 1.5075741476 0 84.0800001 0.10999989999999116 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf256 4.35301697185 0 83.235000075 0.9549999249999956 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf257 1.39485319456 0 82.967500125 1.222499874999997 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf258 2.26632820691 0 83.1674999 1.022500100000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf259 2.24761177072 0 83.83749975 0.3525002499999914 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf260 2.7460388055 0 83.847500175 0.3424998250000044 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf261 2.74582786003 0 82.980000525 1.2099994750000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf262 4.71813705367 0 83.5074998 0.6825001999999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf263 3.70041824283 0 84.8150003 -0.6250002999999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf264 1.5750823683 0 84.279999125 -0.08999912500000562 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf265 1.94319508586 0 83.869999875 0.32000012499999286 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf266 3.15601329684 0 83.81500015 0.3749998499999947 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf267 4.66417009743 0 83.357500425 0.8324995749999999 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf268 4.58407880356 0 83.4825001 0.707499900000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf269 1.93761018664 0 84.077500325 0.11249967499999514 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf270 1.46177875321 0 83.712500175 0.4774998249999953 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf271 3.34678026738 0 83.24250035 0.9474996499999975 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf272 2.85970649021 0 83.4374996 0.7525004000000024 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf273 4.08048673759 0 83.492499925 0.6975000749999936 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf274 2.72070478524 0 84.194999875 -0.004999874999995768 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf275 1.69179877983 0 84.50000005 -0.3100000499999993 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf276 4.59344226045 0 83.40999985 0.7800001499999922 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf277 2.81294567913 0 83.552500075 0.6374999250000002 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf278 3.69669372916 0 84.704999375 -0.5149993750000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf279 2.50732492616 0 83.950000575 0.2399994249999935 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf280 1.95243540846 0 84.10500005 0.08499994999999672 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf281 2.67302161229 0 84.019999425 0.1700005750000031 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf282 3.13207140491 0 84.072499575 0.1175004250000029 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf283 3.8160129289 0 84.967500375 -0.7775003750000025 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf284 1.75461103655 0 84.225000175 -0.035000175000007516 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf285 2.35908554337 0 83.3025001 0.8874998999999946 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf286 2.69747141415 0 84.0325002 0.15749979999999653 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf287 4.57851869679 0 83.43750015 0.7524998499999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf288 1.90058714913 0 83.8174999 0.37250009999999634 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf289 4.11033179261 0 83.72999995 0.4600000499999908 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf290 2.84063642 0 83.777499875 0.412500124999994 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 4 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf291 1.94141836552 0 84.197500425 -0.0075004250000034745 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf292 1.83500358955 0 84.845000675 -0.6550006749999966 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf293 3.03625257388 0 83.610000375 0.5799996249999992 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf294 4.31574006815 0 83.25749945 0.9325005500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf295 2.23709260285 0 84.06000025 0.12999974999999608 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf296 1.74999999982 0 84.09249965 0.09750035000000423 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf297 3.74187152333 0 85.159999875 -0.9699998749999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf298 2.12490468203 0 83.18749975 1.002500249999997 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf299 4.55141371732 0 83.544999425 0.6450005749999974 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf300 2.5505253889 0 83.640000125 0.5499998749999975 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf301 2.24042830922 0 82.605000175 1.584999824999997 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf302 2.99474495472 0 82.437499975 1.7525000250000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf303 1.83741042239 0 82.5424999 1.647500100000002 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf304 3.00314173594 0 82.372500125 1.8174998749999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf305 3.02855824782 0 82.602499925 1.5875000749999941 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf306 5.61856381092 0 82.71000045 1.4799995500000023 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf307 1.93345881243 0 82.8174997 1.3725002999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf308 3.89434526757 0 83.332500325 0.8574996749999997 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf309 3.2085926901 0 83.30000045 0.8899995499999989 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf310 2.65788202931 0 83.82000025 0.36999974999999097 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 5 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf311 3.80589962627 0 83.572500075 0.6174999250000042 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf312 5.86878107869 0 82.49500005 1.6949999499999961 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf313 4.18278212753 0 82.544999875 1.6450001249999957 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf314 2.79461037104 0 82.817499925 1.3725000749999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf315 2.06268071148 0 83.56000045 0.6299995499999937 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf316 3.14283005803 0 82.392499875 1.7975001249999991 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf317 2.90674893404 0 83.729999675 0.46000032499999577 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf318 5.71959200524 0 82.607499525 1.5825004750000033 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf319 3.2692515574 0 83.180000175 1.0099998249999942 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf320 3.95705043142 0 83.585000475 0.6049995249999967 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf321 1.78570672713 0 82.340000125 1.8499998749999946 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf322 2.48005024845 0 82.350000025 1.8399999749999978 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf323 3.79191778968 0 84.6900003 -0.5000002999999964 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf324 3.4043741587 0 83.0474996 1.142500400000003 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf325 3.80417060721 0 82.845000075 1.3449999249999962 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf326 3.26069885994 0 82.777499975 1.412500025 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf327 2.82134620097 0 83.242499825 0.9475001750000018 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf328 3.28309484905 0 83.217500075 0.9724999249999939 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf329 6.46142257104 0 82.4449995 1.7450005000000033 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf330 2.82907922695 0 82.920000125 1.2699998749999963 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf331 1.94130425897 0 83.192499325 0.9975006749999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf332 2.85648187962 0 82.979999725 1.2100002749999987 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf333 6.56415197275 0 82.767500725 1.4224992749999927 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf334 2.85655908058 0 82.9425002 1.2474998 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf335 3.35458479775 0 83.26500045 0.9249995499999955 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf336 1.4969077593 0 83.8175004 0.3724995999999976 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf337 1.94136764987 0 82.994999975 1.1950000249999988 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf338 6.51926617566 0 82.342499925 1.8475000749999992 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf339 2.85661912866 0 82.9450001 1.2449998999999963 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf340 6.88911932811 0 82.5275002 1.662499799999992 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf341 7.27771382595 0 82.409999825 1.7800001749999979 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf342 3.85388414421 0 82.8125 1.3774999999999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf343 3.29733747307 0 82.53750035 1.6524996499999958 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf344 3.91901992783 0 82.702499375 1.4875006249999956 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf345 6.21182199304 0 82.372499675 1.8175003249999975 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf346 6.82308639954 0 82.4049993 1.7850006999999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf347 2.86653584082 0 83.174999575 1.0150004249999967 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf348 2.85661912866 0 82.952500175 1.2374998250000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf349 4.72391028113 0 83.300000575 0.8899994249999992 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf350 3.44433248501 0 82.905000175 1.2849998249999999 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf351 2.89873719902 0 83.060000275 1.1299997250000047 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf352 2.88456051936 0 83.012499775 1.1775002250000028 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf353 1.54886128481 0 84.190000275 -2.7500000499003363e-07 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf354 2.74494531438 0 83.494999875 0.6950001249999929 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf355 1.93352169198 0 82.71249985 1.4775001499999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf356 3.22934629427 0 83.3875006 0.8024994000000021 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf357 3.40560872216 0 82.70500005 1.4849999500000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf358 3.91974339469 0 82.512500225 1.6774997750000011 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf359 6.38098616976 0 82.740000725 1.4499992749999961 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf360 2.11646683273 0 83.324999825 0.8650001749999916 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf361 3.44433248501 0 83.304999925 0.8850000749999936 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf362 3.49997146664 0 82.747500125 1.4424998749999958 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf363 2.92518113588 0 82.717499725 1.4725002750000016 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf364 2.03533351941 0 84.20249995 -0.01249995000000581 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf365 2.72266716041 0 82.8350002 1.3549998000000016 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf366 3.46129945382 0 83.172499975 1.017500025000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf367 4.7579128906 0 83.34250055 0.8474994500000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf368 2.89645117734 0 83.26749935 0.9225006500000035 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf369 7.05240539341 0 82.7375002 1.4524997999999982 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf370 4.20036519704 0 83.484999975 0.7050000250000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf371 2.92751275092 0 82.63750005 1.5524999499999979 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf372 2.94197024782 0 82.6324997 1.557500300000001 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf373 4.5420211194 0 82.57000075 1.6199992499999922 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf374 3.97254442813 0 82.50000025 1.6899997499999984 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf375 7.43572712186 0 82.41000015 1.7799998499999958 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf376 6.75830731775 0 82.5525 1.6375000000000028 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf377 3.52671235951 0 82.5575004 1.6324996000000027 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf378 3.90446543226 0 82.875 1.3149999999999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf379 7.15165036228 0 82.502499975 1.6875000249999914 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf380 2.93910064191 0 82.700000325 1.4899996749999929 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf381 3.76943901493 0 82.51499995 1.6750000499999942 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf382 4.12330429063 0 82.995000375 1.1949996249999941 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf383 7.02987984159 0 82.545000625 1.6449993749999976 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 5 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf384 1.9977292954 0 83.814999975 0.3750000249999914 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf385 3.52671235951 0 82.91249945 1.2775005499999992 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf386 2.88456051936 0 82.882500475 1.3074995249999972 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf387 2.29631826149 0 83.127500375 1.062499625000001 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf388 2.07414220093 0 82.894999475 1.2950005249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf389 2.91305391447 0 82.932500025 1.2574999750000018 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf390 1.71145642796 0 82.7799999 1.4100000999999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf391 3.48503566282 0 83.125000375 1.0649996249999987 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf392 4.6046844451 0 83.337500325 0.8524996750000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf393 3.21327490919 0 82.3400005 1.8499994999999956 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf394 2.8594493929 0 83.954999425 0.23500057500000082 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf395 3.51243140488 0 82.5675 1.6225000000000023 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf396 2.11529906649 0 83.6874996 0.5025004000000024 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf397 2.82921385526 0 83.387499575 0.802500424999991 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf398 2.89873719902 0 83.16500035 1.024999649999998 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf399 2.70213491673 0 82.62250065 1.5674993499999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 5 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf400 5.55996025948 0 82.72000075 1.4699992500000008 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf401 2.13777021806 0 82.5700001 1.6199998999999963 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf402 7.14924239914 0 82.640000175 1.5499998250000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf403 1.98428750071 0 82.944999925 1.245000074999993 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf404 2.84826407523 0 82.947500425 1.2424995749999965 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf405 2.0200446789 0 85.564999375 -1.3749993750000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf406 1.81793200125 0 83.600000425 0.5899995749999931 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf407 3.11156482501 0 82.515000775 1.6749992249999934 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf408 2.91048425212 0 83.79750005 0.39249995000000126 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf409 3.52612669152 0 82.715 1.4749999999999943 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf410 2.44490526971 0 83.2300001 0.9599998999999997 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf411 2.91305391447 0 82.55000005 1.6399999500000035 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf412 3.86832755871 0 82.68750015 1.5024998499999924 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf413 2.64364069268 0 82.520000525 1.6699994749999973 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf414 3.52662083606 0 82.719999875 1.4700001249999985 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf415 2.18999469481 0 82.9924989 1.1975010999999967 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf416 2.23404784749 0 83.0999998 1.0900001999999915 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf417 5.89328407211 0 82.557500325 1.6324996749999912 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf418 2.10482440175 0 82.817499925 1.3725000749999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf419 1.92009721416 0 82.7125001 1.477499899999998 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf420 2.07642908914 0 82.80749945 1.3825005500000032 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf421 2.90906288284 0 83.17 1.019999999999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf422 2.31854954531 0 83.4899998 0.700000199999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf423 2.18847759891 0 84.245000325 -0.05500032500000884 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf424 2.71547413931 0 83.147499725 1.0425002749999948 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf425 2.21249841041 0 82.297499825 1.892500174999995 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf426 2.39455654889 0 83.867499975 0.3225000249999965 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf427 6.5921297526 0 82.5725001 1.6174998999999985 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf428 3.48080578384 0 83.187500175 1.002499825000001 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf429 3.4844637541 0 83.152500025 1.037499975000003 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf430 2.7738545741 0 82.822499875 1.3675001249999923 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf431 3.43287705763 0 83.1475 1.042500000000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf432 3.23999006656 0 82.88750005 1.3024999499999979 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf433 5.89386830137 0 82.607500725 1.5824992750000035 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf434 6.56487679366 0 82.89250055 1.2974994500000037 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf435 1.78475666116 0 82.820000275 1.3699997249999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf436 6.38776489057 0 82.3024996 1.8875003999999933 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf437 2.25110095244 0 82.46499985 1.7250001499999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf438 3.94596728651 0 82.5174999 1.6725000999999935 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs_full.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs_full.txt deleted file mode 100644 index 8e4fae8e8ef42fb38c300efa8ccb8ab855561e43..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/data/tuner_confs_full.txt +++ /dev/null @@ -1,4818 +0,0 @@ -+++++ -conf1 2.64294896823 0 84.24999995 -0.05999995000000524 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf2 1.545808927 0 83.99749985 0.19250015000000076 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf3 2.16268436875 0 83.9749998 0.21500019999999154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf4 2.85243095593 0 85.582500225 -1.3925002250000063 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf5 2.88810451479 0 85.6749999 -1.4849999000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf6 1.91299290156 0 84.285000025 -0.09500002500000448 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf7 2.32152521417 0 84.037500025 0.1524999749999978 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf8 1.72139681434 0 84.2400001 -0.05000010000000543 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf9 1.34088900571 0 84.46999985 -0.27999984999999583 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf10 2.15667646025 0 84.467500075 -0.27750007500000606 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf11 2.0991051315 0 84.747499875 -0.5574998750000049 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf12 2.26270700467 0 83.95499985 0.23500015000000474 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf13 2.30515485402 0 84.2474996 -0.05749959999999987 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf14 2.6444723214 0 84.177500025 0.012499974999997221 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf15 1.72142485041 0 84.20249985 -0.012499849999997537 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf16 1.25940820746 0 84.400002 -0.2100020000000029 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf17 1.66795168964 0 84.86250055 -0.6725005499999952 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf18 1.18154248199 0 84.5449999 -0.3549998999999957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf19 2.69527868518 0 84.082499475 0.10750052499999185 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf20 3.508798013 0 84.942500025 -0.7525000250000033 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf21 1.37857982221 0 84.415000575 -0.2250005749999957 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf22 2.04302762753 0 84.9100004 -0.7200004000000035 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf23 1.80326718101 0 83.8474996 0.3425003999999916 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf24 3.59584543185 0 84.917499975 -0.7274999750000006 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf25 1.68979688527 0 84.899999625 -0.7099996250000089 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf26 3.57472722452 0 85.085000125 -0.8950001249999957 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf27 1.93608746968 0 84.092500575 0.0974994249999952 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf28 2.61505878203 0 84.652500925 -0.4625009250000005 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf29 2.80017803183 0 84.022500175 0.16749982499999305 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf30 1.77399722707 0 84.0 0.18999999999999773 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf31 1.95235606591 0 84.022500025 0.16749997499999836 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf32 1.68983290704 0 84.37500035 -0.1850003499999957 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf33 3.42702742371 0 84.99249975 -0.8024997499999955 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 5 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf34 1.68985992438 0 84.147499875 0.04250012500000366 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf35 1.79338784914 0 83.9824997 0.20750029999999242 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf36 2.18853801922 0 84.08249985 0.1075001499999928 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf37 1.94133991584 0 83.992500525 0.19749947500000076 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf38 1.545808927 0 84.317500175 -0.12750017500000865 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf39 1.68016960419 0 84.8924999 -0.7024999000000065 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf40 1.813707661 0 84.347500225 -0.15750022500000682 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf41 2.30148997712 0 84.24999985 -0.05999984999999697 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf42 3.68309655183 0 84.5824999 -0.39249990000000423 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf43 3.45039880338 0 84.4624997 -0.27249969999999735 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf44 1.37705672429 0 83.852499575 0.33750042500000177 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf45 2.14805395337 0 84.0050001 0.184999899999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf46 2.05402977897 0 83.8524999 0.33750009999999975 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf47 1.70318365569 0 84.06500015 0.1249998499999947 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf48 3.55780405071 0 84.007500125 0.18249987499999065 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf49 1.83125102545 0 84.30250015 -0.11250015000000246 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf50 2.21893941807 0 84.0174997 0.17250029999999583 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf51 1.50399656712 0 83.837500325 0.35249967500000423 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf52 2.16303753646 0 84.2750006 -0.08500060000000076 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf53 3.68773262256 0 84.557500525 -0.36750052499999697 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf54 1.70253070344 0 84.047499675 0.14250032500000032 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf55 1.21172403585 0 84.27750055 -0.08750055000000145 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf56 2.44121698807 0 83.880000375 0.3099996250000032 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf57 1.24213756356 0 84.9699997 -0.7799997000000047 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf58 1.77222897301 0 84.467500325 -0.27750032500000543 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf59 3.41400766118 0 85.0825005 -0.892500499999997 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 7 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf60 1.74981457921 0 84.0449997 0.14500029999999242 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf61 2.66802889905 0 84.069999875 0.12000012500000423 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf62 1.23404106904 0 85.1050003 -0.9150003000000027 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf63 2.5721723393 0 84.19500045 -0.0050004499999971586 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf64 3.52044764786 0 85.060000625 -0.870000625000003 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf65 2.78728442166 0 85.515000175 -1.3250001749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf66 2.76471189661 0 84.397499775 -0.20749977500000227 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf67 1.76326606859 0 84.2224997 -0.03249970000000246 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf68 2.81387959471 0 85.367499725 -1.177499725000004 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf69 2.34792427364 0 85.347500275 -1.1575002750000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf70 2.16444954053 0 84.389999975 -0.19999997499999722 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf71 2.71129466224 0 84.49500005 -0.30500005000000385 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf72 2.12126150431 0 84.472500125 -0.28250012499999855 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf73 1.64774149134 0 84.717500125 -0.5275001250000031 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf74 2.53877485855 0 84.037500075 0.15249992500000076 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf75 2.20062372112 0 84.109999475 0.08000052500000265 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf76 2.08193610951 0 84.70999965 -0.5199996500000026 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf77 2.76479890201 0 84.432499925 -0.24249992500000417 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf78 2.83029289665 0 85.63500095 -1.4450009500000078 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf79 1.67773859853 0 84.44999925 -0.259999250000007 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf80 1.49477759283 0 83.94749985 0.24250014999999792 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf81 3.55427260758 0 84.6400004 -0.4500004000000075 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf82 1.66690077651 0 84.64499965 -0.45499965000000486 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf83 2.5407202431 0 84.030000125 0.1599998749999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf84 1.72467068675 0 85.365000325 -1.1750003249999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf85 3.68413346512 0 84.5499996 -0.35999960000000897 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf86 2.34426089698 0 85.3400005 -1.1500005000000044 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf87 1.5037937756 0 84.48750035 -0.2975003500000071 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf88 2.39067422722 0 84.052499975 0.13750002499999425 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf89 2.50707513602 0 83.980000125 0.20999987499999406 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf90 2.34799381894 0 85.31000005 -1.1200000500000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf91 2.66611715293 0 84.115000125 0.07499987500000316 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf92 3.20484476618 0 83.940000025 0.24999997499999438 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf93 1.69280257704 0 84.497500025 -0.30750002499999596 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf94 1.32318431888 0 83.8700001 0.3199998999999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf95 3.72287609902 0 84.807500325 -0.6175003250000088 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf96 3.25808614625 0 85.262499625 -1.072499625000006 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf97 2.01697928138 0 84.880000475 -0.690000475000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf98 3.57037076434 0 84.977500175 -0.7875001750000052 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf99 3.86425755697 0 84.172499625 0.017500374999997348 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf100 2.70848729777 0 85.605000725 -1.4150007249999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf101 3.51194589165 0 84.83499975 -0.6449997499999967 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf102 3.83462006201 0 84.757499275 -0.567499275000003 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 3 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf103 2.18215937305 0 84.295000275 -0.10500027500000897 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf104 3.49011584637 0 84.977499975 -0.7874999750000029 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf105 4.06050228976 0 84.3225001 -0.13250010000000145 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf106 1.89032564161 0 83.905000125 0.2849998749999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf107 4.30956760793 0 84.042499475 0.1475005249999981 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf108 3.55081672573 0 84.95249995 -0.7624999500000058 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf109 2.10324879556 0 84.179999775 0.010000224999998863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf110 3.29974923559 0 85.2299997 -1.0399996999999956 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf111 3.71617428087 0 84.604999725 -0.41499972500000126 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf112 1.88036454342 0 84.092500525 0.09749947499999223 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 promise swing_level 6 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf113 1.32313131334 0 83.87500045 0.314999549999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf114 1.61009604666 0 84.1125004 0.0774995999999959 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf115 3.4913633548 0 84.9999997 -0.8099997000000059 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf116 2.46248494441 0 84.287499575 -0.0974995750000005 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf117 4.41513247874 0 84.150000225 0.039999774999998294 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf118 1.84459168829 0 84.1875 0.0024999999999977263 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf119 3.50857179065 0 85.134999625 -0.9449996250000083 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf120 1.80671337182 0 83.942499725 0.24750027499999305 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf121 3.54854477527 0 84.694999875 -0.5049998749999958 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf122 1.56738321503 0 84.067500175 0.12249982499999135 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf123 1.87469737853 0 85.257500125 -1.0675001250000093 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf124 3.62227590611 0 84.637499625 -0.44749962500000606 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf125 1.71563522059 0 84.4225002 -0.23250020000000404 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf126 3.50848120632 0 85.07749955 -0.8874995500000011 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf127 3.86941789017 0 84.8150002 -0.6250002000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf128 1.91620722727 0 84.2499998 -0.059999799999999937 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf129 1.44196866861 0 84.097499875 0.09250012500000082 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf130 3.66732089277 0 84.7300001 -0.5400001000000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf131 2.23154899479 0 84.0849998 0.1050001999999921 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf132 1.91952997835 0 83.957500475 0.23249952499999438 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf133 3.71350030483 0 84.65000005 -0.460000050000005 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf134 3.81225222884 0 84.762499425 -0.5724994250000037 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf135 4.19275534826 0 84.072499925 0.11750007499999526 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf136 2.61331302144 0 85.622499825 -1.4324998250000078 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf137 2.76999273759 0 85.6424995 -1.4524995000000018 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf138 2.3065330436 0 85.5024998 -1.3124997999999977 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf139 3.49100459066 0 85.157499875 -0.9674998750000015 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf140 3.48563196276 0 85.027499775 -0.8374997749999977 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf141 2.65322761859 0 83.162500275 1.0274997249999984 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf142 2.65344076691 0 83.172500175 1.0174998250000016 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf143 2.18719023298 0 83.1900003 0.9999997000000036 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf144 2.60653577071 0 83.272499475 0.9175005249999941 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf145 4.46465646641 0 83.540000175 0.6499998249999948 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf146 2.15285399348 0 83.862499625 0.3275003749999996 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf147 3.65175122588 0 83.2049996 0.9850004000000041 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf148 3.9660599653 0 83.552500175 0.6374998249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf149 4.53170414251 0 83.505000075 0.6849999249999996 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf150 3.71617428087 0 85.1624994 -0.9724994000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf151 1.70998952611 0 84.2649998 -0.0749998000000005 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf152 2.64828400486 0 84.325000225 -0.13500022499999886 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf153 2.13368332779 0 84.232500675 -0.04250067499999943 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf154 1.26929324517 0 84.4824997 -0.2924997000000076 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf155 2.41947038317 0 83.31250035 0.8774996500000043 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf156 2.79853043416 0 83.234999475 0.9550005250000027 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf157 2.07817661079 0 85.492499525 -1.3024995250000018 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf158 2.32136581814 0 85.4974998 -1.3074998000000022 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf159 2.57879469037 0 83.847500025 0.3424999749999955 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf160 1.4878499709 0 84.120000275 0.0699997250000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf161 1.68769620605 0 84.239999575 -0.04999957500000107 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf162 2.50393185826 0 83.449999775 0.7400002250000028 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf163 2.68734365023 0 83.3925005 0.7974995000000007 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf164 1.98226632853 0 83.96000065 0.22999934999999994 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf165 1.78321495824 0 84.0299997 0.160000299999993 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf166 2.88456051936 0 83.067500125 1.1224998750000026 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf167 3.46456452848 0 83.1649998 1.0250001999999938 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf168 2.14610519779 0 83.92750015 0.26249984999999754 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf169 2.15539598712 0 83.96749955 0.2225004499999983 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf170 2.14272739582 0 84.882500075 -0.6925000749999981 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf171 2.1141534062 0 85.482500225 -1.2925002249999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf172 4.65121391718 0 83.61999925 0.5700007499999913 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf173 1.90061509944 0 83.349999775 0.8400002249999972 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf174 1.65344959931 0 84.1374996 0.05250039999999956 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf175 2.41558217054 0 83.802499175 0.3875008250000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf176 1.69005187813 0 85.15749975 -0.9674997500000018 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf177 1.92025535635 0 84.01500035 0.17499965000000373 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf178 4.00517782251 0 83.68249965 0.5075003500000008 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf179 1.95590659474 0 84.4924997 -0.3024996999999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf180 4.16910464671 0 83.34249945 0.8475005499999924 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf181 3.60118640098 0 85.06749975 -0.8774997499999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf182 2.28437520771 0 85.487499975 -1.297499975000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf183 3.33515476381 0 83.3474995 0.8425004999999999 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf184 2.4412921699 0 83.73999985 0.45000014999999394 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf185 2.23810809251 0 83.697500125 0.4924998749999929 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf186 2.53508341283 0 83.97000005 0.21999995000000183 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf187 2.28301365028 0 83.904999975 0.2850000250000022 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf188 1.64297502615 0 83.485 0.7049999999999983 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf189 3.36512250412 0 83.09750025 1.0924997500000018 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf190 3.72039948168 0 85.060000075 -0.8700000750000072 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf191 1.8677114471 0 85.1949999 -1.0049999000000014 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 4 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf192 3.34983276697 0 83.1325003 1.057499699999994 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf193 2.79708619899 0 84.067499525 0.12250047499999539 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf194 2.77595110268 0 83.347500475 0.8424995249999938 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf195 1.91798747232 0 83.177499725 1.0125002749999936 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf196 3.02719788308 0 83.809999925 0.3800000749999981 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf197 2.59147181998 0 84.225000325 -0.03500032499999861 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf198 4.13802989146 0 83.572500075 0.6174999250000042 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf199 3.82439653195 0 83.82249965 0.36750035000000025 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf200 2.00918601238 0 83.52749955 0.662500449999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 5 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf201 3.59724691324 0 84.894999725 -0.7049997250000075 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf202 1.55181168477 0 85.2625004 -1.0725003999999956 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf203 2.51796807154 0 83.9549999 0.2350000999999935 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf204 1.93457438195 0 83.497500075 0.6924999249999928 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf205 2.79708619899 0 85.347499675 -1.1574996749999968 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf206 2.63716680826 0 83.817499125 0.37250087500000006 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf207 3.5413017311 0 84.825 -0.6350000000000051 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf208 2.14805395337 0 83.877499775 0.31250022499999375 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf209 2.44534077265 0 83.677500375 0.5124996250000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf210 2.0522306547 0 85.384999175 -1.1949991750000066 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf211 4.43114738736 0 83.40499975 0.785000249999996 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf212 4.14598352007 0 83.56000045 0.6299995499999937 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf213 2.45681276337 0 83.007500125 1.1824998749999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf214 3.4621090664 0 84.95499985 -0.7649998499999953 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf215 1.98960444963 0 84.017500475 0.1724995249999921 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf216 2.64194541602 0 83.87749995 0.31250004999999703 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf217 1.86100158987 0 83.245000075 0.9449999250000047 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf218 2.09726577998 0 84.119999225 0.0700007749999969 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf219 2.50772231544 0 83.782500525 0.4074994749999945 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf220 2.46806682697 0 83.922500175 0.2674998250000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf221 3.46190747049 0 85.155000325 -0.9650003250000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 5 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf222 3.59715169228 0 85.040000075 -0.850000074999997 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf223 2.99394102561 0 83.1549999 1.0350000999999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf224 2.65199694711 0 84.2325001 -0.04250009999999804 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf225 3.03108712786 0 83.847500075 0.3424999249999985 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf226 2.73775314445 0 85.5525001 -1.3625001000000054 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf227 2.08638938557 0 84.6874996 -0.4974995999999976 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf228 2.28640917883 0 83.815000325 0.374999674999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf229 4.65496735922 0 83.4199998 0.7700001999999984 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf230 2.40101445801 0 83.637499375 0.5525006249999933 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf231 3.99006059078 0 83.519999275 0.6700007249999942 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf232 3.5153328867 0 85.199999375 -1.0099993750000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf233 2.30821836295 0 83.512499675 0.6775003249999969 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf234 3.42534336841 0 85.0649998 -0.8749997999999977 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 7 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf235 1.99098571083 0 83.950000225 0.23999977500000114 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf236 1.97184253078 0 83.597499625 0.5925003750000002 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf237 2.65253450847 0 84.117500325 0.0724996750000031 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf238 4.26552823649 0 83.7325007 0.45749929999999495 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf239 1.56116021343 0 83.567500525 0.6224994749999979 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf240 2.230065491 0 83.257500275 0.9324997249999996 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf241 2.51796807154 0 83.33500005 0.8549999499999927 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf242 2.24875342151 0 85.01 -0.8200000000000074 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 5 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf243 2.59512211904 0 84.0050006 0.18499939999999526 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf244 2.03674680878 0 84.125000175 0.06499982500000101 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf245 3.01536982747 0 83.177498925 1.012501075000003 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf246 2.77326672033 0 83.430000075 0.7599999250000025 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf247 3.9641852392 0 83.727500525 0.4624994750000013 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf248 3.60277274156 0 85.1350003 -0.9450003000000038 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf249 3.1600407978 0 83.52249965 0.6675003499999974 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf250 3.19015352593 0 83.9724998 0.21750020000000347 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf251 2.08092928034 0 85.410000475 -1.2200004750000062 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf252 4.38699651938 0 83.800000025 0.38999997499999495 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf253 2.31554800621 0 83.66999985 0.5200001500000013 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf254 4.0571911997 0 83.607499675 0.582500324999998 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf255 1.5075741476 0 84.0800001 0.10999989999999116 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf256 4.35301697185 0 83.235000075 0.9549999249999956 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf257 1.39485319456 0 82.967500125 1.222499874999997 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf258 2.26632820691 0 83.1674999 1.022500100000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf259 2.24761177072 0 83.83749975 0.3525002499999914 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf260 2.7460388055 0 83.847500175 0.3424998250000044 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf261 2.74582786003 0 82.980000525 1.2099994750000036 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf262 4.71813705367 0 83.5074998 0.6825001999999927 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf263 3.70041824283 0 84.8150003 -0.6250002999999964 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf264 1.5750823683 0 84.279999125 -0.08999912500000562 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf265 1.94319508586 0 83.869999875 0.32000012499999286 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf266 3.15601329684 0 83.81500015 0.3749998499999947 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf267 4.66417009743 0 83.357500425 0.8324995749999999 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf268 4.58407880356 0 83.4825001 0.707499900000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf269 1.93761018664 0 84.077500325 0.11249967499999514 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf270 1.46177875321 0 83.712500175 0.4774998249999953 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf271 3.34678026738 0 83.24250035 0.9474996499999975 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 6 -6 promise swing_level 7 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf272 2.85970649021 0 83.4374996 0.7525004000000024 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf273 4.08048673759 0 83.492499925 0.6975000749999936 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 3 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf274 2.72070478524 0 84.194999875 -0.004999874999995768 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf275 1.69179877983 0 84.50000005 -0.3100000499999993 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf276 4.59344226045 0 83.40999985 0.7800001499999922 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf277 2.81294567913 0 83.552500075 0.6374999250000002 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf278 3.69669372916 0 84.704999375 -0.5149993750000021 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf279 2.50732492616 0 83.950000575 0.2399994249999935 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf280 1.95243540846 0 84.10500005 0.08499994999999672 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf281 2.67302161229 0 84.019999425 0.1700005750000031 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf282 3.13207140491 0 84.072499575 0.1175004250000029 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf283 3.8160129289 0 84.967500375 -0.7775003750000025 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf284 1.75461103655 0 84.225000175 -0.035000175000007516 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf285 2.35908554337 0 83.3025001 0.8874998999999946 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 3 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf286 2.69747141415 0 84.0325002 0.15749979999999653 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf287 4.57851869679 0 83.43750015 0.7524998499999924 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf288 1.90058714913 0 83.8174999 0.37250009999999634 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf289 4.11033179261 0 83.72999995 0.4600000499999908 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf290 2.84063642 0 83.777499875 0.412500124999994 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 4 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf291 1.94141836552 0 84.197500425 -0.0075004250000034745 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 4 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf292 1.83500358955 0 84.845000675 -0.6550006749999966 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf293 3.03625257388 0 83.610000375 0.5799996249999992 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 7 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf294 4.31574006815 0 83.25749945 0.9325005500000003 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 6 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf295 2.23709260285 0 84.06000025 0.12999974999999608 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf296 1.74999999982 0 84.09249965 0.09750035000000423 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf297 3.74187152333 0 85.159999875 -0.9699998749999992 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf298 2.12490468203 0 83.18749975 1.002500249999997 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 7 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf299 4.55141371732 0 83.544999425 0.6450005749999974 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf300 2.5505253889 0 83.640000125 0.5499998749999975 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf301 2.24042830922 0 82.605000175 1.584999824999997 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf302 2.99474495472 0 82.437499975 1.7525000250000033 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf303 1.83741042239 0 82.5424999 1.647500100000002 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf304 3.00314173594 0 82.372500125 1.8174998749999958 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf305 3.02855824782 0 82.602499925 1.5875000749999941 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 5 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf306 5.61856381092 0 82.71000045 1.4799995500000023 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 7 -6 promise swing_level 5 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf307 1.93345881243 0 82.8174997 1.3725002999999987 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf308 3.89434526757 0 83.332500325 0.8574996749999997 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf309 3.2085926901 0 83.30000045 0.8899995499999989 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf310 2.65788202931 0 83.82000025 0.36999974999999097 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 5 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf311 3.80589962627 0 83.572500075 0.6174999250000042 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf312 5.86878107869 0 82.49500005 1.6949999499999961 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf313 4.18278212753 0 82.544999875 1.6450001249999957 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 5 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf314 2.79461037104 0 82.817499925 1.3725000749999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf315 2.06268071148 0 83.56000045 0.6299995499999937 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 6 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf316 3.14283005803 0 82.392499875 1.7975001249999991 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf317 2.90674893404 0 83.729999675 0.46000032499999577 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf318 5.71959200524 0 82.607499525 1.5825004750000033 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 6 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf319 3.2692515574 0 83.180000175 1.0099998249999942 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf320 3.95705043142 0 83.585000475 0.6049995249999967 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf321 1.78570672713 0 82.340000125 1.8499998749999946 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf322 2.48005024845 0 82.350000025 1.8399999749999978 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf323 3.79191778968 0 84.6900003 -0.5000002999999964 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf324 3.4043741587 0 83.0474996 1.142500400000003 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf325 3.80417060721 0 82.845000075 1.3449999249999962 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf326 3.26069885994 0 82.777499975 1.412500025 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf327 2.82134620097 0 83.242499825 0.9475001750000018 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf328 3.28309484905 0 83.217500075 0.9724999249999939 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf329 6.46142257104 0 82.4449995 1.7450005000000033 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 7 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf330 2.82907922695 0 82.920000125 1.2699998749999963 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf331 1.94130425897 0 83.192499325 0.9975006749999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf332 2.85648187962 0 82.979999725 1.2100002749999987 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf333 6.56415197275 0 82.767500725 1.4224992749999927 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf334 2.85655908058 0 82.9425002 1.2474998 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf335 3.35458479775 0 83.26500045 0.9249995499999955 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf336 1.4969077593 0 83.8175004 0.3724995999999976 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 6 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf337 1.94136764987 0 82.994999975 1.1950000249999988 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf338 6.51926617566 0 82.342499925 1.8475000749999992 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf339 2.85661912866 0 82.9450001 1.2449998999999963 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf340 6.88911932811 0 82.5275002 1.662499799999992 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf341 7.27771382595 0 82.409999825 1.7800001749999979 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf342 3.85388414421 0 82.8125 1.3774999999999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf343 3.29733747307 0 82.53750035 1.6524996499999958 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf344 3.91901992783 0 82.702499375 1.4875006249999956 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf345 6.21182199304 0 82.372499675 1.8175003249999975 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 7 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf346 6.82308639954 0 82.4049993 1.7850006999999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf347 2.86653584082 0 83.174999575 1.0150004249999967 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf348 2.85661912866 0 82.952500175 1.2374998250000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 4 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf349 4.72391028113 0 83.300000575 0.8899994249999992 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf350 3.44433248501 0 82.905000175 1.2849998249999999 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf351 2.89873719902 0 83.060000275 1.1299997250000047 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf352 2.88456051936 0 83.012499775 1.1775002250000028 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf353 1.54886128481 0 84.190000275 -2.7500000499003363e-07 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf354 2.74494531438 0 83.494999875 0.6950001249999929 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf355 1.93352169198 0 82.71249985 1.4775001499999973 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf356 3.22934629427 0 83.3875006 0.8024994000000021 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 7 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf357 3.40560872216 0 82.70500005 1.4849999500000024 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf358 3.91974339469 0 82.512500225 1.6774997750000011 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf359 6.38098616976 0 82.740000725 1.4499992749999961 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf360 2.11646683273 0 83.324999825 0.8650001749999916 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf361 3.44433248501 0 83.304999925 0.8850000749999936 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf362 3.49997146664 0 82.747500125 1.4424998749999958 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf363 2.92518113588 0 82.717499725 1.4725002750000016 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf364 2.03533351941 0 84.20249995 -0.01249995000000581 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf365 2.72266716041 0 82.8350002 1.3549998000000016 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf366 3.46129945382 0 83.172499975 1.017500025000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf367 4.7579128906 0 83.34250055 0.8474994500000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf368 2.89645117734 0 83.26749935 0.9225006500000035 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf369 7.05240539341 0 82.7375002 1.4524997999999982 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf370 4.20036519704 0 83.484999975 0.7050000250000039 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf371 2.92751275092 0 82.63750005 1.5524999499999979 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf372 2.94197024782 0 82.6324997 1.557500300000001 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf373 4.5420211194 0 82.57000075 1.6199992499999922 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf374 3.97254442813 0 82.50000025 1.6899997499999984 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf375 7.43572712186 0 82.41000015 1.7799998499999958 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf376 6.75830731775 0 82.5525 1.6375000000000028 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf377 3.52671235951 0 82.5575004 1.6324996000000027 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf378 3.90446543226 0 82.875 1.3149999999999977 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf379 7.15165036228 0 82.502499975 1.6875000249999914 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf380 2.93910064191 0 82.700000325 1.4899996749999929 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf381 3.76943901493 0 82.51499995 1.6750000499999942 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 6 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf382 4.12330429063 0 82.995000375 1.1949996249999941 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf383 7.02987984159 0 82.545000625 1.6449993749999976 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 5 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf384 1.9977292954 0 83.814999975 0.3750000249999914 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf385 3.52671235951 0 82.91249945 1.2775005499999992 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 3 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf386 2.88456051936 0 82.882500475 1.3074995249999972 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf387 2.29631826149 0 83.127500375 1.062499625000001 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf388 2.07414220093 0 82.894999475 1.2950005249999919 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf389 2.91305391447 0 82.932500025 1.2574999750000018 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf390 1.71145642796 0 82.7799999 1.4100000999999907 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf391 3.48503566282 0 83.125000375 1.0649996249999987 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf392 4.6046844451 0 83.337500325 0.8524996750000042 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 5 -4 promise swing_level 4 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf393 3.21327490919 0 82.3400005 1.8499994999999956 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf394 2.8594493929 0 83.954999425 0.23500057500000082 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf395 3.51243140488 0 82.5675 1.6225000000000023 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 4 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf396 2.11529906649 0 83.6874996 0.5025004000000024 -1 gpu conv perf 2 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf397 2.82921385526 0 83.387499575 0.802500424999991 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf398 2.89873719902 0 83.16500035 1.024999649999998 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf399 2.70213491673 0 82.62250065 1.5674993499999914 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 5 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf400 5.55996025948 0 82.72000075 1.4699992500000008 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 6 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf401 2.13777021806 0 82.5700001 1.6199998999999963 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 5 -5 promise swing_level 5 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf402 7.14924239914 0 82.640000175 1.5499998250000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf403 1.98428750071 0 82.944999925 1.245000074999993 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf404 2.84826407523 0 82.947500425 1.2424995749999965 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf405 2.0200446789 0 85.564999375 -1.3749993750000016 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 6 -5 promise swing_level 7 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf406 1.81793200125 0 83.600000425 0.5899995749999931 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf407 3.11156482501 0 82.515000775 1.6749992249999934 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 5 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf408 2.91048425212 0 83.79750005 0.39249995000000126 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf409 3.52612669152 0 82.715 1.4749999999999943 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf410 2.44490526971 0 83.2300001 0.9599998999999997 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf411 2.91305391447 0 82.55000005 1.6399999500000035 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 4 -5 promise swing_level 3 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf412 3.86832755871 0 82.68750015 1.5024998499999924 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 6 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf413 2.64364069268 0 82.520000525 1.6699994749999973 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 4 -4 promise swing_level 5 -5 promise swing_level 6 -6 promise swing_level 6 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf414 3.52662083606 0 82.719999875 1.4700001249999985 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 4 -8 gpu softmax fp32 1 ------ -+++++ -conf415 2.18999469481 0 82.9924989 1.1975010999999967 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 5 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf416 2.23404784749 0 83.0999998 1.0900001999999915 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf417 5.89328407211 0 82.557500325 1.6324996749999912 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf418 2.10482440175 0 82.817499925 1.3725000749999907 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu conv fp32 1 add fp32 1 tanh fp32 1 -4 promise swing_level 3 -5 promise swing_level 4 -6 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf419 1.92009721416 0 82.7125001 1.477499899999998 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 5 -4 promise swing_level 5 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf420 2.07642908914 0 82.80749945 1.3825005500000032 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 promise swing_level 7 -6 gpu conv perf 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf421 2.90906288284 0 83.17 1.019999999999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp32 1 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf422 2.31854954531 0 83.4899998 0.700000199999991 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 promise swing_level 7 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf423 2.18847759891 0 84.245000325 -0.05500032500000884 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 5 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 promise swing_level 6 -8 gpu softmax fp32 1 ------ -+++++ -conf424 2.71547413931 0 83.147499725 1.0425002749999948 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 6 -4 promise swing_level 5 -5 promise swing_level 3 -6 promise swing_level 6 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf425 2.21249841041 0 82.297499825 1.892500174999995 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu conv fp16 1 add fp16 1 tanh fp16 1 -4 promise swing_level 3 -5 gpu conv fp16 1 add fp16 1 tanh fp16 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf426 2.39455654889 0 83.867499975 0.3225000249999965 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 6 -3 promise swing_level 3 -4 promise swing_level 4 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 5 -8 gpu softmax fp32 1 ------ -+++++ -conf427 6.5921297526 0 82.5725001 1.6174998999999985 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 promise swing_level 4 -5 promise swing_level 4 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf428 3.48080578384 0 83.187500175 1.002499825000001 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf429 3.4844637541 0 83.152500025 1.037499975000003 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 7 -8 gpu softmax fp32 1 ------ -+++++ -conf430 2.7738545741 0 82.822499875 1.3675001249999923 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 gpu conv fp32 1 add fp32 1 tanh fp32 1 -6 promise swing_level 4 -7 gpu mul fp16 1 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf431 3.43287705763 0 83.1475 1.042500000000004 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf432 3.23999006656 0 82.88750005 1.3024999499999979 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf433 5.89386830137 0 82.607500725 1.5824992750000035 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 7 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf434 6.56487679366 0 82.89250055 1.2974994500000037 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 5 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf435 1.78475666116 0 82.820000275 1.3699997249999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 promise swing_level 3 -4 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf436 6.38776489057 0 82.3024996 1.8875003999999933 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 -2 promise swing_level 7 -3 promise swing_level 3 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf437 2.25110095244 0 82.46499985 1.7250001499999996 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 3 -4 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 promise swing_level 3 -6 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ -+++++ -conf438 3.94596728651 0 82.5174999 1.6725000999999935 -1 gpu conv perf 1 add fp16 1 tanh fp16 1 -2 gpu conv perf 2 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 promise swing_level 4 -4 promise swing_level 3 -5 promise swing_level 3 -6 promise swing_level 4 -7 promise swing_level 3 -8 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/out-run-1 b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/out-run-1 deleted file mode 100644 index 9403664de162f84dcaa420755304e5c308af51e5..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/out-run-1 +++ /dev/null @@ -1 +0,0 @@ -run_dnn_frequency_exp.sh: line 28: ./alexnet2_loop_wrapperapi_linked: No such file or directory diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/predictive/alexnet2.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/predictive/alexnet2.txt deleted file mode 100644 index 6ec4a06d3dbd2e088d6db287d23dd3bd5aad7ddb..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet2/predictive/alexnet2.txt +++ /dev/null @@ -1,419 +0,0 @@ -1114.3009809999999 -+++++ -conf1 1 1 84.98 0.0 -1 gpu conv fp32 11 add fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 tanh fp32 1 -4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -5 gpu conv fp32 11 add fp32 1 tanh fp32 1 -6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 -7 gpu mul fp32 11 add fp32 1 -8 gpu softmax fp32 1 ------ -+++++ -conf2 2.4248748377353113 2.0815908534183163 84.5 0.480000000000004 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf3 2.4055188425519614 2.0586265720811823 84.48 0.5 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf4 2.4156140842962985 2.0617867479342706 84.28 0.7000000000000028 -1 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf5 2.396416918342732 2.0506214971794585 84.02 0.960000000000008 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 151 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf6 2.463002582910052 2.1171077568609458 83.84 1.1400000000000006 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf7 2.360283215266004 2.0255245321874304 83.78 1.2000000000000028 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf8 2.4140791541736157 2.0671513522247653 83.74000000000001 1.2399999999999949 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf9 2.457753689612079 2.1086250651240137 83.7 1.2800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf10 2.459170454055443 2.1111925341396343 83.7 1.2800000000000011 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 164 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf11 2.4135986141645764 2.060453960420927 83.62 1.3599999999999994 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf12 2.4631278039012106 2.1092094797926637 83.58 1.4000000000000057 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf13 2.535761391794481 2.16998336112692 83.58 1.4000000000000057 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf14 2.289006193945062 1.961240158652051 83.54 1.4399999999999977 -1 gpu conv perf_fp16 167 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf15 2.4257674844112573 2.0808440756495563 83.5 1.480000000000004 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 161 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf16 2.458122368488622 2.109531159729078 83.48 1.5 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf17 2.281072202152105 1.9539314420536427 83.46000000000001 1.519999999999996 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf18 2.4572171342078444 2.1088933553775697 83.46000000000001 1.519999999999996 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf19 2.3017607719030058 1.9782265708150768 83.42 1.5600000000000023 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf20 2.379206814483014 2.047909200292713 83.39999999999999 1.5800000000000125 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 151 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf21 2.4636282705302537 2.1162281156388527 83.39999999999999 1.5800000000000125 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf22 2.461590101374146 2.1108493881199184 83.22 1.7600000000000051 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 161 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf23 2.537054645442804 2.167568834938183 83.22 1.7600000000000051 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf24 2.4631604723407885 2.1099694757102845 83.17999999999999 1.8000000000000114 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf25 2.4636282705302537 2.1162281156388527 83.14 1.8400000000000034 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf26 2.462588899729088 2.109477918791931 83.14 1.8400000000000034 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf27 2.4638085754689025 2.1071960926343603 83.1 1.8800000000000097 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf28 2.4640079766123635 2.110326453157297 83.08 1.9000000000000057 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf29 2.459337622764853 2.107249218450713 83.06 1.9200000000000017 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf30 2.538176340059405 2.173287257415721 83.02000000000001 1.9599999999999937 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 164 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf31 2.3905426931959846 2.044333576277581 83.02000000000001 1.9599999999999937 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf32 2.459337622764853 2.107249218450713 83.0 1.980000000000004 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf33 2.458968579288317 2.1063450826631396 82.89999999999999 2.0800000000000125 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 163 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf34 2.2912974651603877 1.9670210508860688 82.8 2.180000000000007 -1 gpu conv perf_fp16 168 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 155 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf35 2.4648489763056327 2.113931670664391 82.66 2.3200000000000074 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf36 2.4599076869402854 2.1077397371200193 82.6 2.3800000000000097 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 153 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 162 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf37 2.4636282705302537 2.1162281156388527 82.54 2.4399999999999977 -1 gpu conv fp16 11 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 160 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ -+++++ -conf38 2.591814267389778 2.222680944458784 82.26 2.719999999999999 -1 gpu conv fp16 12 add fp16 1 tanh fp16 1 -2 gpu conv perf_fp16 154 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 -4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 -5 gpu conv fp16 12 add fp16 1 tanh fp16 1 -6 gpu conv perf_fp16 157 add fp16 1 tanh fp16 1 pool_max fp16 1 -7 gpu mul fp16 12 add fp16 1 -8 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/alexnet_imagenet_loss_123_batch420_dev_tuner_valid_fp16__soc.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/alexnet_imagenet_loss_123_batch420_dev_tuner_valid_fp16__soc.txt deleted file mode 100644 index 272df8e4d045a4c73c375fb662297c514e58239f..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/alexnet_imagenet_loss_123_batch420_dev_tuner_valid_fp16__soc.txt +++ /dev/null @@ -1,265 +0,0 @@ -750.80768325 -+++++ -conf1 1 1 55.86 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 relu fp32 1 -7 gpu mul fp32 11 add fp32 1 relu fp32 1 -8 gpu mul fp32 11 add fp32 1 -9 gpu softmax fp32 1 ------ -+++++ -conf2 1.667145773545935 1.3698797622099539 55.81 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf3 1.7087251624244644 1.398823413549252 55.81 0.04999999999999716 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf4 1.84864414271949 1.479057844495293 55.279999999999994 0.5800000000000054 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf5 1.800706832676418 1.4408815345970516 55.2 0.6599999999999966 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf6 1.8685574503823363 1.4891603919413798 55.16 0.7000000000000028 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf7 1.723135066135383 1.4115676136104214 55.14 0.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf8 1.8752464017460646 1.4969487004141868 55.059999999999995 0.8000000000000043 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf9 1.8656914340360484 1.4920969855586097 54.58 1.2800000000000011 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf10 1.8752464017460646 1.4969487004141868 54.50000000000001 1.3599999999999923 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf11 1.8685574503823363 1.4891603919413798 54.44 1.4200000000000017 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf12 1.84864414271949 1.479057844495293 54.42 1.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf13 1.84864414271949 1.479057844495293 54.42 1.4399999999999977 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf14 1.8655222324604301 1.4933133992701588 54.36 1.5 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf15 1.8560509412688768 1.48465767083507 54.339999999999996 1.5200000000000031 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf16 1.84864414271949 1.479057844495293 54.279999999999994 1.5800000000000054 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf17 1.8732357449123018 1.4977961717105837 54.02 1.8399999999999963 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf18 1.8752464017460646 1.4969487004141868 53.959999999999994 1.9000000000000057 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf19 1.8752464017460646 1.4969487004141868 53.959999999999994 1.9000000000000057 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf20 1.9649407316946184 1.523499599635438 53.839999999999996 2.020000000000003 -1 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf21 1.9649407316946184 1.523499599635438 53.839999999999996 2.020000000000003 -1 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf22 1.8927901772122795 1.5103066197047974 53.82 2.039999999999999 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv fp16 11 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges.txt deleted file mode 100644 index 62a24fb72cb17f3c586d8097a31e1faab2e56845..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges.txt +++ /dev/null @@ -1,8 +0,0 @@ -0.0 255.0 0.5811487324237921 -0.5503702693581581 1.648145 -2.802485 0.0 1572.3096923828125 -0.0 1572.3096923828125 0.26272463005783797 -0.2867645202279091 0.501206 -0.47985682 0.0 3183.7813264160477 -0.0 3183.7813264160477 0.15785247704386754 -0.16606662392616273 0.5545839 -0.42038992 0.0 1765.4451872558668 -0.0 1765.4451872558668 0.11035470351576919 -0.10464580833911895 0.9042998 -1.4275751 0.0 1345.5418548586083 -0.0 1345.5418548586083 0.10250756608694818 -0.09240880391001702 2.4040315 -0.45662758 0.0 1227.3563232421875 -0.0 1227.3563232421875 0.02963459612801672 -0.030517672039568428 0.09377053 -0.07124679 0.0 1034.5966391601676 -0.0 1034.5966391601676 0.039147199764847845 -0.038392101023346184 0.1841282 -0.050027702 0.0 839.0697069702154 -0.0 839.0697069702154 0.08549865524470925 -0.05494491942599416 0.15416704 -0.16314922 -608.3993963623047 1082.8444653320819 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges_rt.txt deleted file mode 100644 index 897937563bac79bdc4592c6a6e7ce46e41e75920..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/quant_ranges_rt.txt +++ /dev/null @@ -1,10 +0,0 @@ -1 0.0 255.0 0.5811487324237921 -0.5503702693581581 1.648145 -2.802485 0.0 1572.3096923828125 -2 0.0 1572.3096923828125 0.26272463005783797 -0.2867645202279091 0.501206 -0.47985682 0.0 3183.7813264160477 -3 0.0 3183.7813264160477 0.15785247704386754 -0.16606662392616273 0.5545839 -0.42038992 0.0 1765.4451872558668 -4 0.0 1765.4451872558668 0.11035470351576919 -0.10464580833911895 0.9042998 -1.4275751 0.0 1345.5418548586083 -5 0.0 1345.5418548586083 0.10250756608694818 -0.09240880391001702 2.4040315 -0.45662758 0.0 1227.3563232421875 -6 0.0 1227.3563232421875 0.02963459612801672 -0.030517672039568428 0.09377053 -0.07124679 0.0 1034.5966391601676 -7 0.0 1034.5966391601676 0.039147199764847845 -0.038392101023346184 0.1841282 -0.050027702 0.0 839.0697069702154 -8 0.0 839.0697069702154 0.08549865524470925 -0.05494491942599416 0.15416704 -0.16314922 -608.3993963623047 1082.8444653320819 -9 0 0 0 0 0 0 0 0 - diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/tuner_confs.txt deleted file mode 100644 index 377bc6a5628a5f869ccab9723838622afcbb210c..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/data/tuner_confs.txt +++ /dev/null @@ -1,13 +0,0 @@ -750.80768325 -+++++ -conf1 1.0 0 79.1 0.0 -1 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 1 add fp32 1 relu fp32 1 -4 gpu conv fp32 1 add fp32 1 relu fp32 1 -5 gpu conv fp32 1 add fp32 1 relu fp32 1 pool_max fp32 1 -6 gpu mul fp32 1 add fp32 1 relu fp32 1 -7 gpu mul fp32 1 add fp32 1 relu fp32 1 -8 gpu mul fp32 1 add fp32 1 -9 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/predictive/alexnet_imagenet.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/predictive/alexnet_imagenet.txt deleted file mode 100644 index b0e42a5aaa5d7b5a06b6422a5c33a0047b6eff8d..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/alexnet_imagenet/predictive/alexnet_imagenet.txt +++ /dev/null @@ -1,229 +0,0 @@ -2739.950736 -+++++ -conf1 1 1 56.3 0.0 -1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -3 gpu conv fp32 11 add fp32 1 relu fp32 1 -4 gpu conv fp32 11 add fp32 1 relu fp32 1 -5 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 -6 gpu mul fp32 11 add fp32 1 relu fp32 1 -7 gpu mul fp32 11 add fp32 1 relu fp32 1 -8 gpu mul fp32 11 add fp32 1 -9 gpu softmax fp32 1 ------ -+++++ -conf2 1.802133644103582 1.8186433204507424 55.76 0.5399999999999991 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf3 1.7574572103878898 1.7673706184460103 55.58 0.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf4 2.0227701930718065 2.043112495268932 55.42 0.8799999999999955 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf5 1.9872634777043927 2.002789650227035 55.120000000000005 1.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf6 1.8204253918445088 1.843736069756362 54.84 1.4599999999999937 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf7 1.9308336510645352 1.934889049414224 54.74 1.5599999999999952 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf8 2.0146435217865446 2.0367475358800102 54.58 1.7199999999999989 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv fp16 12 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf9 2.0101709494490696 2.0329911158023064 54.400000000000006 1.8999999999999915 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf10 2.0052132441967916 2.0284931705407003 54.300000000000004 1.999999999999993 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf11 2.010827434817262 2.036001862538864 54.2 2.0999999999999943 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 154 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf12 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf13 1.9923471030291253 2.009177323959059 54.120000000000005 2.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf14 1.9923471030291253 2.009177323959059 54.120000000000005 2.1799999999999926 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf15 2.028037341700216 2.049760395549724 54.0 2.299999999999997 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf16 1.9910730364852436 2.006510848093771 53.54 2.759999999999998 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf17 2.1567475543719614 2.159142310265706 53.300000000000004 2.999999999999993 -1 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf18 2.1567475543719614 2.159142310265706 53.300000000000004 2.999999999999993 -1 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 12 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ -+++++ -conf19 2.0232690820426464 2.0527698121318476 53.300000000000004 2.999999999999993 -1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 -2 gpu conv perf_fp16 168 add fp16 1 relu fp16 1 pool_max fp16 1 -3 gpu conv fp16 11 add fp16 1 relu fp16 1 -4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 -5 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 -6 gpu mul fp16 12 add fp16 1 relu fp16 1 -7 gpu mul fp16 12 add fp16 1 relu fp16 1 -8 gpu mul fp16 12 add fp16 1 -9 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt deleted file mode 100644 index 0239f3aaf71c8132b4b3a593ca66fa4e10f813de..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_confs_batch220.txt +++ /dev/null @@ -1,3288 +0,0 @@ -+++++ -conf1 1 0 99.69 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu mul fp32 1 add fp32 1 tanh fp32 1 -4 gpu mul fp32 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf1 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf2 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf3 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf4 1.72017310656 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf5 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf6 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf7 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf8 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf9 1.73515484904 0 99.580002 0.5099980000000045 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf10 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf11 1.73515484904 0 99.699997 0.39000300000000154 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf12 1.77226558474 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf13 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf14 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf15 1.7756263212 0 99.099998 0.8850029999999975 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf16 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf17 1.7756263212 0 99.260002 0.6449969999999965 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf18 1.51382277464 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf19 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf20 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf21 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf22 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf23 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf24 1.72333900478 0 99.519997 0.5700029999999942 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf25 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf26 1.77226558474 0 99.339996 0.5250059999999976 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf27 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf28 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf29 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf30 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf31 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf32 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf33 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf34 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf35 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf36 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf37 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf38 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf39 1.77226558474 0 99.059998 0.9450030000000069 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf40 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf41 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf42 1.7756263212 0 99.239998 0.6750029999999967 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf43 1.7756263212 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf44 1.73515484904 0 99.599998 0.4900019999999984 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf45 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf46 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf47 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf48 1.75663902891 0 99.540001 0.549998999999994 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf49 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf50 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf51 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf52 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf53 1.51382277464 0 99.620003 0.4699970000000008 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf54 1.72017310656 0 99.440002 0.6499979999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf55 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf56 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf57 1.7756263212 0 99.040001 0.974998499999991 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf58 1.78817265464 0 99.300003 0.5849954999999909 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf59 1.77226558474 0 99.459999 0.6300010000000015 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf60 1.77226558474 0 99.18 0.7649999999999864 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf61 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf62 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf63 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf64 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf65 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf66 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf67 1.73515484904 0 99.5 0.5899999999999977 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf68 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf69 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf70 1.73515484904 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf71 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf72 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf73 1.72333900478 0 99.480003 0.6099970000000013 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf74 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf75 1.77226558474 0 99.300003 0.5849954999999909 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf76 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf77 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf78 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf79 1.78817265464 0 99.519997 0.5700029999999942 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf80 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf81 1.70544786131 0 99.639999 0.45000099999999466 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf82 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf83 1.77226558474 0 99.220001 0.7049985000000021 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf84 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf85 1.51137932951 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf86 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf87 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf88 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf89 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf90 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf91 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf92 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf93 1.72017310656 0 99.559998 0.5300020000000046 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf94 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf95 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf96 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf97 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf98 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf99 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf100 1.51382277464 0 99.599998 0.4900019999999984 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf101 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf102 1.522932631 0 99.639999 0.45000099999999466 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf103 1.7756263212 0 99.360001 0.49499850000000123 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf104 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf105 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf106 1.77226558474 0 99.199997 0.7350045000000023 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf107 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf108 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf109 1.78817265464 0 99.199997 0.7350045000000023 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf110 1.522932631 0 99.68 0.4099999999999909 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf111 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf112 1.72333900478 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf113 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf114 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf115 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf116 1.72333900478 0 99.540001 0.549998999999994 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf117 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf118 1.78817265464 0 99.379997 0.46500449999999205 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf119 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf120 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf121 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf122 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf123 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf124 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf125 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf126 1.7756263212 0 99.379997 0.46500449999999205 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf127 1.51137932951 0 99.660004 0.42999599999999705 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf128 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf129 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf130 1.78817265464 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf131 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf132 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf133 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf134 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf135 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf136 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf137 1.77226558474 0 99.120003 0.8549955000000011 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf138 1.73515484904 0 99.599998 0.4900019999999984 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf139 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf140 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf141 1.73515484904 0 99.459999 0.6300010000000015 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf142 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf143 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf144 1.7756263212 0 99.459999 0.6300010000000015 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf145 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf146 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf147 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf148 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf149 1.72017310656 0 99.559998 0.5300020000000046 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf150 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf151 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf152 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf153 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf154 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf155 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf156 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf157 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf158 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf159 1.73515484904 0 99.699997 0.39000300000000154 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf160 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf161 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf162 1.78817265464 0 99.059998 0.9450030000000069 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf163 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf164 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf165 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf166 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf167 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf168 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf169 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf170 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf171 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf172 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf173 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf174 1.75663902891 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf175 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf176 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf177 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf178 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf179 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf180 1.78817265464 0 98.980003 1.064995500000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf181 1.7756263212 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf182 1.78817265464 0 99.379997 0.46500449999999205 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf183 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf184 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf185 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf186 1.73515484904 0 99.599998 0.4900019999999984 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf187 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf188 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf189 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf190 1.78817265464 0 99.199997 0.7350045000000023 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf191 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf192 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf193 1.77226558474 0 99.059998 0.9450030000000069 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf194 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf195 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf196 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf197 1.77226558474 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf198 1.78817265464 0 99.080002 0.9149970000000067 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf199 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf200 1.7756263212 0 99.360001 0.49499850000000123 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf201 1.7756263212 0 99.199997 0.7350045000000023 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf202 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf203 1.77226558474 0 99.199997 0.7350045000000023 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf204 1.51382277464 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf205 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf206 1.7756263212 0 99.099998 0.8850029999999975 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf207 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf208 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf209 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf210 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf211 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf212 1.77226558474 0 98.940002 1.1249969999999863 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf213 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf214 1.75663902891 0 99.32 0.5550000000000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf215 1.78817265464 0 99.620003 0.4699970000000008 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf216 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf217 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf218 1.51137932951 0 99.440002 0.6499979999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf219 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf220 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf221 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf222 1.77226558474 0 99.5 0.5899999999999977 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf223 1.522932631 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf224 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf225 1.522932631 0 99.68 0.4099999999999909 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf226 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf227 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf228 1.77226558474 0 98.980003 1.064995500000002 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf229 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf230 1.7756263212 0 99.239998 0.6750029999999967 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf231 1.70544786131 0 99.639999 0.45000099999999466 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf232 1.73515484904 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf233 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf234 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf235 1.78817265464 0 99.559998 0.5300020000000046 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf236 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf237 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf238 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf239 1.77226558474 0 98.800003 1.334995499999991 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf240 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf241 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf242 1.78817265464 0 99.239998 0.6750029999999967 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf243 1.75663902891 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf244 1.72017310656 0 99.419998 0.670001999999991 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf245 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf246 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf247 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf248 1.72333900478 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf249 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf250 1.51137932951 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf251 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf252 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf253 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf254 1.72017310656 0 97.860001 2.7449985000000012 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf255 1.72333900478 0 99.68 0.4099999999999909 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf256 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf257 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf258 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf259 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf260 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf261 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf262 2.00016617632 0 97.980003 2.564995500000002 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf263 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf264 1.99590274244 0 98.099998 2.3850029999999975 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf265 2.00016617632 0 98.080002 2.4149970000000067 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf266 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf267 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf268 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf269 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf270 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf271 1.73515484904 0 99.699997 0.39000300000000154 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf272 1.7756263212 0 99.260002 0.6449969999999965 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf273 1.51382277464 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf274 1.72017310656 0 99.559998 0.5300020000000046 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf275 1.75663902891 0 98.0 2.5349999999999966 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf276 1.73515484904 0 99.5 0.5899999999999977 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf277 1.78817265464 0 98.959999 1.0950015000000022 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf278 1.7756263212 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf279 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf280 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf281 1.78817265464 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf282 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf283 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf284 1.78817265464 0 99.300003 0.5849954999999909 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf285 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf286 1.78817265464 0 99.660004 0.42999599999999705 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf287 1.77226558474 0 98.599998 1.6350029999999975 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf288 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf289 1.77226558474 0 99.059998 0.9450030000000069 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf290 1.522932631 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf291 1.72333900478 0 99.540001 0.549998999999994 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf292 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf293 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf294 1.7756263212 0 99.040001 0.974998499999991 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf295 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf296 1.99590274244 0 98.599998 1.6350029999999975 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf297 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf298 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf299 1.73515484904 0 99.580002 0.5099980000000045 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf300 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf301 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf302 1.7756263212 0 97.760002 2.8949969999999965 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf303 1.75663902891 0 99.32 0.5550000000000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf304 1.77226558474 0 99.5 0.5899999999999977 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf305 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf306 1.78817265464 0 98.82 1.3050000000000068 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf307 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf308 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf309 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf310 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf311 1.77226558474 0 99.279999 0.6150014999999911 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf312 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf313 1.78817265464 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf314 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf315 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf316 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf317 1.72017310656 0 99.580002 0.5099980000000045 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf318 1.7756263212 0 99.32 0.5550000000000068 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf319 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf320 1.72017310656 0 99.440002 0.6499979999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf321 1.7756263212 0 99.199997 0.7350045000000023 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf322 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf323 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf324 1.73515484904 0 99.5 0.5899999999999977 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf325 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf326 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf327 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf328 1.72333900478 0 99.660004 0.42999599999999705 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf329 1.78817265464 0 98.940002 1.1249969999999863 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf330 1.78817265464 0 99.18 0.7649999999999864 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf331 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf332 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf333 1.78817265464 0 99.199997 0.7350045000000023 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf334 1.78817265464 0 99.32 0.5550000000000068 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf335 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf336 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf337 2.00016617632 0 98.059998 2.445003000000007 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf338 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf339 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf340 2.00016617632 0 98.419998 1.9050029999999865 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf341 2.00016617632 0 98.0 2.5349999999999966 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf342 1.75663902891 0 99.160004 0.7949939999999955 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf343 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf344 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf345 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf346 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf347 1.73515484904 0 99.660004 0.42999599999999705 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf348 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf349 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf350 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf351 1.78817265464 0 98.980003 1.064995500000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf352 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf353 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf354 1.72333900478 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf355 1.73515484904 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf356 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf357 1.70544786131 0 99.620003 0.4699970000000008 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf358 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf359 1.7756263212 0 98.82 1.3050000000000068 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf360 1.7756263212 0 99.379997 0.46500449999999205 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf361 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf362 1.78817265464 0 99.279999 0.6150014999999911 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf363 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf364 1.70544786131 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf365 1.78817265464 0 97.760002 2.8949969999999965 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf366 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf367 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf368 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf369 1.7756263212 0 99.160004 0.7949939999999955 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf370 1.78817265464 0 99.080002 0.9149970000000067 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf371 1.75663902891 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf372 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf373 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf374 1.72333900478 0 99.599998 0.4900019999999984 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf375 1.99590274244 0 97.940002 2.6249969999999863 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf376 1.78817265464 0 98.980003 1.064995500000002 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf377 1.7756263212 0 99.279999 0.6150014999999911 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf378 1.522932631 0 99.540001 0.549998999999994 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf379 1.5 0 99.699997 0.39000300000000154 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf380 1.78817265464 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf381 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf382 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf383 1.72017310656 0 99.300003 0.5849954999999909 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf384 1.70544786131 0 99.639999 0.45000099999999466 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf385 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf386 1.73515484904 0 99.639999 0.45000099999999466 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf387 1.72017310656 0 99.620003 0.4699970000000008 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf388 1.51382277464 0 99.660004 0.42999599999999705 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf389 1.7756263212 0 99.300003 0.5849954999999909 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf390 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf391 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf392 1.72333900478 0 99.620003 0.4699970000000008 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf393 1.51382277464 0 99.620003 0.4699970000000008 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf394 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf395 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf396 1.73515484904 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf397 1.77226558474 0 99.32 0.5550000000000068 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf398 1.75663902891 0 99.32 0.5550000000000068 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf399 1.77226558474 0 98.980003 1.064995500000002 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf400 2.00016617632 0 98.040001 2.474998499999991 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf401 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf402 1.78817265464 0 99.300003 0.5849954999999909 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf403 1.75663902891 0 99.199997 0.7350045000000023 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf404 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf405 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf406 1.7756263212 0 98.400002 1.9349969999999956 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf407 1.73515484904 0 99.559998 0.5300020000000046 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf408 1.72333900478 0 99.580002 0.5099980000000045 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf409 1.72333900478 0 99.639999 0.45000099999999466 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf410 1.73515484904 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt deleted file mode 100644 index 707fd70be086b8961875c2cfd94ba1f41d2ac208..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ /dev/null @@ -1,904 +0,0 @@ -+++++ -conf1 1 0 99.69 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu mul fp32 1 add fp32 1 tanh fp32 1 -4 gpu mul fp32 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf1 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf2 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf3 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf4 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf5 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf6 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf7 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf8 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf9 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf10 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf11 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf12 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf13 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf14 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf15 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf16 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf17 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf18 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf19 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf20 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf21 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf22 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf23 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf24 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf25 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf26 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf27 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf28 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf29 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf30 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf31 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf32 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf33 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf34 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf35 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf36 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf37 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf38 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf39 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf40 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf41 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf42 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf43 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf44 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf45 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf46 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf47 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf48 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf49 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf50 2.00016617632 0 98.400002 1.9349969999999956 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf51 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf52 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf53 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf54 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf55 1.97610564729 0 99.599998 0.4900019999999984 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf56 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf57 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf58 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf59 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf60 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf61 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf62 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf63 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf64 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf65 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf66 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf67 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf68 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf69 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf70 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf71 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf72 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf73 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf74 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf75 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf76 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf77 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf78 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf79 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf80 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf81 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf82 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf83 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf84 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf85 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf86 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf87 1.99590274244 0 98.519997 1.7550044999999912 -1 gpu conv samp 35 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf88 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf89 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf90 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf91 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf92 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf93 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf94 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf95 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf96 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf97 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf98 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf99 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf100 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf101 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf102 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf103 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf104 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf105 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf106 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf107 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 21 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf108 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf109 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 31 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf110 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf111 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf112 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 tanh fp16 1 pool_max fp16 1 -2 gpu conv perf 27 add fp16 1 tanh fp16 1 pool_max fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt deleted file mode 100644 index db97ae4b9d5abaeced8b09a98b74126185466af7..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ /dev/null @@ -1,13968 +0,0 @@ -+++++ -conf1 1 0 99.69 0 -1 gpu conv fp32 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp32 1 add fp32 1 tanh fp32 1 -4 gpu mul fp32 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf1 3.86059861244 0 99.5865002 0.5034997999999945 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf2 2.47778695782 0 99.401499275 0.6885007249999916 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf3 2.50228643329 0 99.302000275 0.5819995875000004 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf4 3.92040413524 0 99.545499375 0.5445006249999921 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf5 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf6 2.47778695782 0 99.04700025 0.964499625000002 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf7 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf8 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf9 3.77195447337 0 99.631001025 0.4589989749999944 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf10 2.47778695782 0 99.2549994 0.6525008999999926 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf11 6.14799414721 0 99.50250035 0.5874996499999924 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf12 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf13 2.47778695782 0 99.38799915 0.4530012749999983 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf14 3.84474688915 0 99.5614995 0.5285005000000013 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf15 3.34244261096 0 99.594499925 0.4955000750000039 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf16 2.50228643329 0 99.4655008 0.6244991999999968 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf17 2.47778695782 0 98.991999975 1.0470000374999984 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf18 6.14799414721 0 99.41749905 0.6725009499999942 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf19 3.34244261096 0 99.51449975 0.5755002499999989 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf20 3.92040413524 0 99.5669999 0.5230000999999987 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf21 3.80166404425 0 99.543499725 0.546500274999994 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf22 3.85964385182 0 99.4755007 0.6144993 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf23 2.47778695782 0 99.31599945 0.5610008249999865 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf24 2.47778695782 0 99.405499825 0.6845001749999909 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf25 2.50228643329 0 99.37349895 0.4747515749999991 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf26 6.61857279171 0 99.494500325 0.5954996749999936 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf27 3.84474688915 0 99.417499625 0.6725003749999928 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf28 2.47778695782 0 99.026998925 0.9945016124999952 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf29 2.50228643329 0 99.370499525 0.4792507124999972 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf30 6.30106886729 0 99.5040001 0.5859998999999988 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf31 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf32 3.7862916372 0 99.5900006 0.49999940000000154 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf33 3.86059861244 0 99.612000375 0.4779996250000039 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf34 3.92040413524 0 99.558499875 0.5315001250000023 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf35 3.86059861244 0 99.56849965 0.521500349999991 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf36 5.02870270579 0 99.267000425 0.6344993624999873 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf37 6.14799414721 0 99.50099985 0.5890001499999983 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf38 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf39 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf40 2.50228643329 0 99.027499025 0.9937514625000006 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf41 6.14799414721 0 99.554999675 0.535000324999993 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf42 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf43 3.84474688915 0 99.7254995 0.3645004999999998 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf44 3.92040413524 0 99.57699975 0.5130002499999989 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf45 6.14799414721 0 99.5009999 0.5890001000000012 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf46 2.47778695782 0 99.376999825 0.46950026249999866 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf47 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf48 3.85964385182 0 99.269500375 0.6307494374999862 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf49 3.13161472572 0 99.5865002 0.5034997999999945 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf50 6.30106886729 0 99.25849995 0.6472500749999952 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf51 5.92620561097 0 99.54949965 0.5405003499999964 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf52 2.47124761202 0 99.10999975 0.8700003749999965 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf53 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf54 3.38717868509 0 99.33499915 0.5325012749999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf55 3.80166404425 0 99.513499725 0.5765002749999951 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf56 2.44096937877 0 99.3519992 0.5070012000000048 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf57 3.92040413524 0 99.475999775 0.6140002249999924 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf58 3.92040413524 0 99.3189995 0.5565007499999908 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf59 2.47778695782 0 99.479500975 0.6104990250000043 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf60 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf61 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf62 2.81322619695 0 99.540999075 0.5490009249999958 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf63 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf64 3.86059861244 0 99.615501 0.474499000000003 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf65 3.86059861244 0 99.585999975 0.5040000250000048 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf66 3.86059861244 0 99.515000025 0.5749999749999916 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf67 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf68 5.79060658268 0 99.425999975 0.6640000250000014 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf69 2.50228643329 0 98.901499925 1.1827501125000026 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf70 2.47124761202 0 98.265500075 2.136749887499988 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf71 3.77195447337 0 99.63050095 0.45949904999999946 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf72 2.50228643329 0 98.91149985 1.167750224999999 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf73 5.02870270579 0 99.46900055 0.6209994499999937 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf74 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf75 2.57685599488 0 99.371499325 0.4777510124999935 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf76 2.51187737029 0 99.390999175 0.6990008249999932 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf77 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf78 6.14799414721 0 99.49949955 0.5905004500000018 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf79 2.47124761202 0 99.21099945 0.7185008249999925 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf80 6.30106886729 0 99.517499475 0.5725005250000038 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf81 5.92620561097 0 99.542999375 0.547000625000004 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf82 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf83 3.33055390722 0 99.418500225 0.6714997749999952 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf84 6.61857279171 0 99.50099985 0.5890001499999983 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf85 3.80166404425 0 99.521499575 0.5685004249999907 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf86 4.4071692756 0 99.419500275 0.6704997249999934 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf87 2.47124761202 0 99.37749925 0.46875112499999716 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf88 5.79060658268 0 99.565499625 0.5245003749999967 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf89 6.7963162944 0 99.453000325 0.6369996749999928 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf90 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf91 6.14799414721 0 99.43249995 0.6575000500000044 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf92 3.85964385182 0 99.473000375 0.6169996249999997 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf93 2.50228643329 0 98.878998975 1.216501537499994 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf94 5.02870270579 0 99.442000525 0.6479994750000003 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf95 6.57211871555 0 99.33249975 0.5362503750000016 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf96 3.38717868509 0 99.336998575 0.5295021375000033 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf97 3.92040413524 0 99.5710002 0.5189997999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf98 4.64385542353 0 99.51799975 0.5720002499999964 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf99 2.47124761202 0 99.01849985 1.007250225 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf100 2.5439518228 0 99.4895 0.600499999999991 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf101 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf102 6.10789096832 0 99.341499625 0.5227505625000006 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf103 2.50228643329 0 99.438500325 0.651499674999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf104 2.50228643329 0 99.4235 0.6664999999999935 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf105 2.50228643329 0 99.00049975 1.034250374999992 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf106 6.10789096832 0 99.590500925 0.49949907499999713 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf107 3.80166404425 0 99.5274999 0.5625001000000026 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf108 6.7963162944 0 99.495500375 0.5944996249999918 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf109 6.14799414721 0 99.495000225 0.5949997749999995 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf110 3.86059861244 0 99.5875 0.5024999999999921 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf111 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf112 4.90489779833 0 99.4235004 0.6664996000000031 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf113 4.93072604433 0 99.519499875 0.5705001250000038 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf114 2.47124761202 0 99.22599905 0.6960014249999986 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf115 2.57685599488 0 99.38249875 0.4612518750000021 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf116 2.47778695782 0 99.387499325 0.4537510125000068 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf117 3.92040413524 0 99.4889999 0.6010001000000017 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf118 2.47124761202 0 99.3574979 0.49875314999999887 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf119 4.90489779833 0 99.338499075 0.5272513874999945 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf120 2.57685599488 0 98.909499725 1.1707504124999915 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf121 3.7862916372 0 99.425500575 0.6644994249999968 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf122 5.02870270579 0 99.51799975 0.5720002499999964 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf123 6.10789096832 0 99.41449975 0.6755002499999933 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf124 3.80166404425 0 99.5164998 0.5735001999999924 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf125 6.30106886729 0 99.469500325 0.6204996749999993 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf126 4.93072604433 0 99.522999375 0.567000625 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf127 3.86059861244 0 99.618499975 0.4715000249999918 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf128 2.55088214386 0 99.382999475 0.46050078749998846 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf129 6.14799414721 0 99.513499725 0.5765002749999951 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf130 6.30106886729 0 99.449999925 0.6400000749999976 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf131 2.50228643329 0 98.300999875 2.083500187499993 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf132 3.33055390722 0 99.563499475 0.5265005249999973 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf133 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf134 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf135 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf136 3.85964385182 0 99.47600045 0.613999549999997 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf137 2.78229733114 0 99.5514998 0.5385001999999958 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf138 3.77195447337 0 99.3254996 0.5467505999999958 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf139 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf140 2.81322619695 0 99.5814995 0.508500499999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf141 2.57685599488 0 99.417499875 0.6725001249999935 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf142 4.93072604433 0 99.5164998 0.5735001999999924 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf143 2.50228643329 0 99.466500825 0.6234991750000006 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf144 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf145 2.77405457184 0 99.573499525 0.5165004749999952 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf146 3.70186719231 0 99.722999925 0.3670000750000014 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf147 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf148 3.92040413524 0 99.5589999 0.5310000999999943 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf149 6.61857279171 0 99.414999925 0.6750000749999941 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf150 3.38717868509 0 99.56849935 0.5215006500000016 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf151 3.34244261096 0 99.616000475 0.47399952499999076 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf152 5.33920664205 0 99.4435006 0.6464994000000047 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf153 2.5439518228 0 98.314999825 2.062500262499995 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf154 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf155 2.50228643329 0 98.911000225 1.168499662500004 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf156 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf157 2.50228643329 0 99.39599955 0.6940004499999987 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf158 3.38717868509 0 99.46750085 0.6224991500000044 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf159 2.81322619695 0 99.461000775 0.6289992249999955 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf160 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf161 2.50228643329 0 99.007999825 1.0230002624999983 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf162 3.92040413524 0 99.5534999 0.5365000999999922 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf163 2.50228643329 0 99.43850055 0.6514994499999972 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf164 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf165 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf166 6.30106886729 0 99.2590004 0.6464993999999891 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf167 2.50228643329 0 98.98099975 1.0635003750000038 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf168 2.50228643329 0 99.372998825 0.4755017625000022 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf169 5.02870270579 0 99.4364997 0.6535002999999989 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf170 2.50228643329 0 97.885499575 2.7067506375000008 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf171 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf172 2.50228643329 0 98.90100005 1.1834999250000067 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf173 2.50228643329 0 98.326998875 2.044501687499995 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf174 3.92040413524 0 99.4730004 0.6169995999999941 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf175 5.02870270579 0 99.25650025 0.6502496249999936 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf176 2.50228643329 0 99.40049935 0.6895006499999937 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf177 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf178 2.50228643329 0 99.375999275 0.47100108749999947 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf179 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf180 2.81322619695 0 99.341498825 0.5227517624999933 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf181 6.30106886729 0 99.451500425 0.6384995749999917 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf182 2.50228643329 0 99.094000075 0.8939998875000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf183 3.92040413524 0 99.31999995 0.5550000750000024 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf184 2.50228643329 0 99.373498975 0.47475153749999066 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf185 2.50228643329 0 99.47200125 0.6179987499999925 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf186 3.92040413524 0 99.582000275 0.5079997249999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf187 3.85964385182 0 99.5244993 0.5655006999999955 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf188 6.30106886729 0 99.459000325 0.6309996749999925 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf189 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf190 6.30106886729 0 99.472500875 0.6174991250000034 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf191 2.50228643329 0 97.651000575 3.0584991375 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf192 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf193 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf194 3.92040413524 0 99.551 0.5389999999999958 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf195 2.50228643329 0 99.423000075 0.6669999249999933 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf196 3.85964385182 0 99.475500725 0.6144992749999943 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf197 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf198 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf199 6.30106886729 0 99.517000075 0.5729999249999992 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf200 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf201 3.85964385182 0 99.437000325 0.6529996749999981 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf202 6.30106886729 0 99.437500275 0.6524997249999928 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf203 2.50228643329 0 99.188499275 0.7522510874999995 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf204 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf205 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf206 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf207 6.30106886729 0 99.440500775 0.6494992249999939 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf208 2.47778695782 0 98.98650045 1.0552493250000055 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf209 3.92040413524 0 99.559499725 0.5305002750000029 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf210 6.14799414721 0 99.559999775 0.5300002250000034 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf211 4.93072604433 0 99.5089997 0.5810002999999938 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf212 4.90489779833 0 99.332999675 0.535500487500002 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf213 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf214 2.47124761202 0 99.316000575 0.5609991374999908 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf215 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf216 3.92040413524 0 99.5844996 0.5055003999999969 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf217 2.47124761202 0 99.1250007 0.847498949999995 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf218 2.47124761202 0 99.381998825 0.4620017625000017 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf219 6.14799414721 0 99.42000025 0.6699997499999967 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf220 6.10789096832 0 99.416 0.6740000000000009 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf221 5.92620561097 0 99.5324998 0.5575002000000012 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf222 2.50228643329 0 99.3659991 0.4860013500000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf223 2.81322619695 0 99.59350015 0.49649985000000074 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf224 3.85964385182 0 99.272000525 0.626999212500003 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf225 6.14799414721 0 99.46450015 0.6254998499999914 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf226 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf227 6.14799414721 0 99.49450015 0.5954998500000045 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf228 2.81322619695 0 99.334998975 0.5325015374999893 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf229 6.14799414721 0 99.5029999 0.5870000999999917 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf230 3.86059861244 0 99.57099965 0.5190003499999932 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf231 6.30106886729 0 99.4249999 0.6650000999999947 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf232 3.77195447337 0 99.64550045 0.44449954999999763 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf233 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf234 6.30106886729 0 99.455500425 0.634499575000001 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf235 2.44096937877 0 99.4455007 0.6444993000000011 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf236 5.02870270579 0 99.430500375 0.6594996250000037 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf237 2.77405457184 0 99.431500125 0.6584998749999983 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf238 6.30106886729 0 99.259000225 0.6464996625000055 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf239 3.92040413524 0 99.331999675 0.5370004874999879 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf240 3.34244261096 0 99.561499925 0.528500074999991 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf241 6.10789096832 0 99.344999375 0.5175009374999959 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf242 4.93072604433 0 99.524499625 0.5655003749999935 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf243 2.47778695782 0 99.0039994 1.0290008999999998 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf244 6.14799414721 0 99.501000025 0.5889999750000016 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf245 3.86059861244 0 99.590000225 0.4999997750000006 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf246 2.81322619695 0 99.585999875 0.5040001249999989 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf247 3.84474688915 0 99.570500075 0.5194999249999995 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf248 3.38717868509 0 99.3339993 0.5340010499999934 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf249 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf250 2.50228643329 0 99.2840001 0.6089998499999965 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf251 6.10789096832 0 99.57099985 0.5190001499999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf252 5.02870270579 0 99.26750015 0.6337497749999912 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf253 2.50228643329 0 99.388999175 0.45150123750000404 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf254 3.71567552873 0 99.558999425 0.5310005750000016 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf255 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf256 3.33055390722 0 99.567500075 0.5224999249999996 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf257 2.50228643329 0 99.387999625 0.4530005624999873 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf258 2.47778695782 0 99.325999725 0.5460004124999926 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf259 3.80166404425 0 99.533999275 0.5560007249999984 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf260 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf261 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf262 2.47778695782 0 99.37499845 0.47250232499998646 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf263 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf264 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf265 4.78704248134 0 99.542999225 0.5470007749999951 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf266 3.86059861244 0 99.6060007 0.48399930000000213 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf267 3.86059861244 0 99.614001325 0.47599867499999393 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf268 3.86059861244 0 99.5959999 0.4940001000000024 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf269 5.02870270579 0 99.524999575 0.5650004250000024 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf270 3.34244261096 0 99.583499925 0.5065000749999996 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf271 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf272 3.92040413524 0 99.54199885 0.5480011499999989 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf273 3.86059861244 0 99.582500175 0.5074998249999908 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf274 3.33055390722 0 99.724499675 0.36550032499999363 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf275 6.14799414721 0 99.504500025 0.5854999749999991 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf276 2.47124761202 0 98.819999675 1.3050004874999885 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf277 6.10789096832 0 99.40649895 0.6835010499999982 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf278 6.10789096832 0 99.577999475 0.5120005249999992 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf279 2.47778695782 0 99.30150025 0.5827496249999911 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf280 2.50228643329 0 99.3719988 0.4770017999999965 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf281 2.50228643329 0 98.29700015 2.089499774999993 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf282 4.93072604433 0 99.470500775 0.6194992249999928 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf283 2.50228643329 0 99.439000875 0.6509991249999928 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf284 6.14799414721 0 99.46549985 0.6245001499999973 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf285 3.85964385182 0 99.43350045 0.6564995500000009 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf286 6.30106886729 0 99.4350001 0.6549999000000014 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf287 2.50228643329 0 98.327000375 2.0444994375000007 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf288 3.38717868509 0 99.326000275 0.545999587499999 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf289 2.50228643329 0 99.420999875 0.669000124999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf290 2.50228643329 0 99.467501025 0.6224989749999935 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf291 3.92040413524 0 99.583000475 0.506999524999992 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf292 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf293 2.50228643329 0 99.38349915 0.4597512749999879 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf294 2.81322619695 0 99.4725008 0.6174991999999918 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf295 1.99590274244 0 99.540001 0.549998999999994 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf296 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf297 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf298 3.85964385182 0 99.4755009 0.6144990999999976 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf299 3.80166404425 0 99.524499125 0.5655008749999922 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf300 3.86059861244 0 99.5054997 0.5845002999999963 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf301 6.30106886729 0 99.45100085 0.638999149999998 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf302 5.02870270579 0 99.4765002 0.6134997999999939 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf303 2.47778695782 0 99.0105 1.0192500000000067 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf304 3.92040413524 0 99.562000025 0.5279999749999945 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf305 6.30106886729 0 99.50299955 0.5870004499999993 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf306 3.84474688915 0 99.729999675 0.3600003249999958 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf307 6.14799414721 0 99.5119996 0.578000400000002 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf308 4.90489779833 0 99.58749965 0.5025003499999997 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf309 3.7862916372 0 99.33799965 0.528000524999996 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf310 6.30106886729 0 99.2670004 0.6344993999999957 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf311 6.30106886729 0 99.435500625 0.6544993749999947 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf312 2.47778695782 0 99.331499275 0.5377510874999984 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf313 2.77405457184 0 99.580000425 0.5099995750000034 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf314 6.14799414721 0 99.54800025 0.5419997499999966 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf315 2.50228643329 0 98.917000175 1.1594997374999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf316 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf317 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf318 3.92040413524 0 99.55649935 0.533500650000002 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf319 2.47778695782 0 99.265 0.6374999999999957 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf320 2.47124761202 0 98.317499 2.0587514999999996 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf321 3.33055390722 0 99.427499975 0.6625000249999943 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf322 2.50228643329 0 99.00349965 1.0297505250000043 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf323 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf324 6.30106886729 0 99.4665007 0.6234993000000003 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf325 4.93072604433 0 99.518999775 0.5710002250000002 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf326 5.92620561097 0 99.542499525 0.5475004750000011 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf327 3.86059861244 0 99.55699975 0.533000249999995 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf328 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf329 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf330 2.50228643329 0 99.3734996 0.47475059999999303 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf331 6.14799414721 0 99.50849975 0.5815002499999992 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf332 2.81322619695 0 99.33399895 0.5340015750000049 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf333 2.50228643329 0 99.375999975 0.47100003749999786 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf334 3.92040413524 0 99.546999 0.5430009999999982 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf335 6.30106886729 0 99.43900055 0.6509994499999948 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf336 3.85964385182 0 99.4830001 0.6069998999999996 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf337 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf338 5.02870270579 0 99.470500275 0.6194997249999915 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf339 2.50228643329 0 98.750500325 1.40924951249999 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf340 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf341 2.81322619695 0 99.540999625 0.549000375 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf342 5.02870270579 0 99.518000275 0.5719997249999921 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf343 6.30106886729 0 99.462500575 0.6274994249999907 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf344 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf345 5.02870270579 0 99.259499975 0.6457500375000009 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf346 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf347 2.50228643329 0 98.88849885 1.2022517249999893 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf348 2.50228643329 0 99.401999825 0.6880001749999934 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf349 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf350 2.50228643329 0 98.31600045 2.0609993249999903 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf351 3.85964385182 0 99.441 0.6489999999999952 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf352 2.50228643329 0 99.0334995 0.9847507499999892 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf353 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf354 2.50228643329 0 99.443500275 0.6464997249999925 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf355 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf356 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf357 2.81322619695 0 99.57849965 0.51150035 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf358 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf359 2.50228643329 0 99.42499965 0.665000349999994 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf360 2.50228643329 0 98.90749945 1.173750824999992 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf361 3.38717868509 0 99.3249995 0.5475007499999904 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf362 2.50228643329 0 99.01050035 1.0192494749999952 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf363 2.50228643329 0 99.37949865 0.46575202499999335 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf364 2.50228643329 0 98.987000375 1.0544994375000059 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf365 2.50228643329 0 99.467501025 0.6224989749999935 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf366 3.38717868509 0 99.533499175 0.5565008249999949 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf367 5.02870270579 0 99.465000275 0.6249997250000036 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf368 2.50228643329 0 98.8994997 1.1857504499999862 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf369 2.50228643329 0 99.095000075 0.8924998874999943 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf370 6.30106886729 0 99.465499825 0.6245001750000029 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf371 5.02870270579 0 99.480000725 0.6099992750000013 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf372 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf373 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf374 3.85964385182 0 99.462000625 0.627999374999996 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf375 3.38717868509 0 99.583499875 0.5065001249999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf376 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf377 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf378 6.30106886729 0 99.504999975 0.5850000249999937 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf379 6.30106886729 0 99.258500625 0.647249062500002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf380 2.50228643329 0 99.290000375 0.5999994374999886 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf381 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf382 5.02870270579 0 99.439500025 0.6504999749999968 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf383 2.50228643329 0 97.8925002 2.6962496999999956 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf384 3.38717868509 0 99.55950045 0.5304995499999962 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf385 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf386 6.30106886729 0 99.435499425 0.6545005749999945 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf387 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf388 2.47124761202 0 99.21699935 0.7095009750000045 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf389 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf390 6.14799414721 0 99.512499525 0.5775004750000022 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf391 6.14799414721 0 99.496000075 0.593999925 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf392 6.30106886729 0 99.256499475 0.6502507874999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf393 4.90489779833 0 99.41399975 0.6760002499999956 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf394 2.50228643329 0 99.37949885 0.46575172499998985 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf395 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf396 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf397 6.10789096832 0 99.40799955 0.6820004499999982 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf398 4.93072604433 0 99.4785008 0.6114991999999916 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf399 2.47778695782 0 99.044499725 0.9682504125000051 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf400 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf401 3.92040413524 0 99.549000125 0.5409998749999915 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf402 4.78704248134 0 99.557500025 0.5324999750000018 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf403 3.92040413524 0 99.339499575 0.5257506374999892 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf404 3.77195447337 0 99.64400015 0.4459998500000012 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf405 2.78229733114 0 99.515498725 0.5745012749999973 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf406 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf407 3.38717868509 0 99.539999575 0.5500004250000018 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf408 2.50228643329 0 99.28500105 0.6074984249999886 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf409 6.30106886729 0 99.4274997 0.6625002999999993 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf410 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf411 3.86059861244 0 99.61500045 0.47499955000000116 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf412 3.7862916372 0 99.422500375 0.6674996249999993 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf413 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf414 2.81322619695 0 99.592500325 0.4974996749999946 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf415 6.10789096832 0 99.3419997 0.5220004499999931 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf416 6.14799414721 0 99.461500375 0.6284996249999978 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf417 3.85964385182 0 99.4770005 0.6129994999999951 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf418 6.14799414721 0 99.508999875 0.5810001249999971 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf419 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf420 2.50228643329 0 99.3769985 0.4695022499999979 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf421 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf422 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf423 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf424 6.30106886729 0 99.4700003 0.6199997000000025 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf425 6.30106886729 0 99.45350025 0.636499749999993 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf426 3.84474688915 0 99.56299955 0.5270004499999971 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf427 2.81322619695 0 99.578000175 0.5119998249999981 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf428 2.47124761202 0 99.12450055 0.8482491750000065 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf429 4.90489779833 0 99.5894995 0.5005004999999955 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf430 2.47778695782 0 99.179000625 0.7664990624999959 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf431 3.86059861244 0 99.49899945 0.5910005499999983 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf432 2.50228643329 0 99.2835007 0.6097489499999895 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf433 3.38717868509 0 99.567999775 0.5220002249999937 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf434 6.14799414721 0 99.548998875 0.5410011250000025 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf435 2.77405457184 0 99.736499925 0.3535000749999938 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf436 4.93072604433 0 99.5544993 0.5355006999999944 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf437 4.90489779833 0 99.329499025 0.5407514624999905 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf438 5.02870270579 0 99.475999575 0.6140004249999947 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf439 6.30106886729 0 99.5009997 0.5890003000000036 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf440 3.80166404425 0 99.50849925 0.5815007499999979 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf441 6.30106886729 0 99.434000275 0.6559997249999953 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf442 6.14799414721 0 99.41999995 0.6700000499999931 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf443 3.85964385182 0 99.47000065 0.6199993499999948 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf444 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf445 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf446 6.14799414721 0 99.50449985 0.5855001499999958 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf447 3.38717868509 0 99.582499575 0.5075004249999978 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf448 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf449 3.38717868509 0 99.543499525 0.5465004749999963 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf450 2.00016617632 0 98.220001 2.204998500000002 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf451 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf452 6.30106886729 0 99.442500275 0.6474997249999973 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf453 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf454 2.50228643329 0 99.292000475 0.5969992875000045 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf455 2.44096937877 0 99.3534991 0.5047513500000065 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf456 2.50228643329 0 98.90349965 1.1797505249999958 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf457 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf458 2.47778695782 0 99.26350005 0.6397499249999896 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf459 4.93072604433 0 99.5080002 0.5819997999999998 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf460 2.81322619695 0 99.578999925 0.5110000749999927 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf461 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf462 3.92040413524 0 99.556999575 0.5330004249999917 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf463 2.50228643329 0 99.371499375 0.477750937499998 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf464 6.14799414721 0 99.41999945 0.6700005499999918 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf465 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf466 2.47124761202 0 98.272500275 2.1262495874999985 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf467 3.85964385182 0 99.4570007 0.632999300000003 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf468 6.10789096832 0 99.412499925 0.6775000749999919 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf469 6.30106886729 0 99.4435008 0.6464992000000024 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf470 2.47124761202 0 98.813000175 1.315499737499998 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf471 3.86059861244 0 99.49750055 0.5924994499999997 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf472 3.92040413524 0 99.553499375 0.5365006249999965 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf473 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf474 2.50228643329 0 99.40199945 0.6880005499999925 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf475 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf476 6.14799414721 0 99.4554998 0.6345001999999994 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf477 2.50228643329 0 99.3669987 0.48450195000000207 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf478 2.77405457184 0 99.7349998 0.35500020000000065 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf479 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf480 2.50228643329 0 99.00699965 1.0245005250000006 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf481 6.14799414721 0 99.540999725 0.5490002749999917 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf482 3.33055390722 0 99.7384998 0.35150019999999815 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf483 3.7862916372 0 99.594000025 0.49599997499999804 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf484 6.10789096832 0 99.580000225 0.5099997749999915 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf485 2.47124761202 0 99.50349965 0.5865003500000029 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf486 2.50228643329 0 99.19349965 0.7447505249999864 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf487 6.30106886729 0 99.457000125 0.6329998750000044 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf488 3.38717868509 0 99.471999825 0.6180001750000003 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf489 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf490 3.86059861244 0 99.607000275 0.48299972499999344 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf491 6.10789096832 0 99.334499875 0.5332501874999878 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf492 3.80166404425 0 99.413499775 0.6765002249999924 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf493 5.02870270579 0 99.448500525 0.6414994749999977 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf494 2.47124761202 0 99.21199905 0.7170014249999923 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf495 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf496 6.14799414721 0 99.503000075 0.586999924999995 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf497 2.50228643329 0 99.098501075 0.8872483874999944 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf498 5.02870270579 0 99.274000025 0.6239999624999868 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf499 4.90489779833 0 99.58150035 0.5084996499999989 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf500 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf501 3.85964385182 0 99.44050065 0.6494993499999936 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf502 2.50228643329 0 99.362999025 0.49050146250000637 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf503 3.86059861244 0 99.607500325 0.482499674999994 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf504 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf505 5.02870270579 0 99.439999975 0.6500000249999914 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf506 2.47778695782 0 99.422999625 0.667000374999995 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf507 3.80166404425 0 99.517499625 0.5725003749999985 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf508 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf509 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf510 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf511 2.50228643329 0 99.437500525 0.6524994749999934 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf512 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf513 6.30106886729 0 99.505500025 0.5844999749999943 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf514 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf515 5.02870270579 0 99.473500475 0.6164995250000033 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf516 3.38717868509 0 99.340999725 0.5235004124999918 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf517 3.92040413524 0 99.55600015 0.5339998499999951 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf518 2.81322619695 0 99.582499875 0.5075001250000014 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf519 5.02870270579 0 99.434999875 0.6550001249999952 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf520 5.02870270579 0 99.471500325 0.6184996750000039 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf521 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf522 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf523 3.38717868509 0 99.560999675 0.5290003249999927 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf524 3.85964385182 0 99.438000125 0.6519998749999957 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf525 2.50228643329 0 99.382499475 0.46125078749999204 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf526 2.50228643329 0 99.464500775 0.625499224999993 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf527 3.38717868509 0 99.577499825 0.5125001749999939 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf528 2.50228643329 0 98.99950025 1.0357496250000011 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf529 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf530 2.50228643329 0 99.387999225 0.45300116249999434 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf531 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf532 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf533 2.50228643329 0 99.0314993 0.9877510500000071 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf534 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf535 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf536 2.50228643329 0 98.74550055 1.4167491749999925 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf537 6.30106886729 0 99.462 0.6279999999999945 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf538 3.92040413524 0 99.32449935 0.548250975000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf539 3.85964385182 0 99.470000825 0.6199991749999981 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf540 2.50228643329 0 99.37150005 0.4777499250000048 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf541 6.30106886729 0 99.44350025 0.6464997499999982 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf542 6.30106886729 0 99.456000225 0.6339997750000009 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf543 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf544 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf545 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf546 5.02870270579 0 99.516499975 0.5735000249999956 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf547 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf548 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf549 3.38717868509 0 99.546999725 0.5430002749999915 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf550 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf551 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf552 3.92040413524 0 99.542999425 0.5470005749999928 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf553 3.38717868509 0 99.472000875 0.6179991249999915 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf554 2.50228643329 0 99.0099999 1.0200001500000013 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf555 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf556 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf557 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf558 3.85964385182 0 99.272500475 0.626249287499995 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf559 6.30106886729 0 99.43700055 0.6529994500000044 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf560 2.50228643329 0 98.316000025 2.0609999625000057 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf561 3.38717868509 0 99.5814995 0.508500499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf562 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf563 2.50228643329 0 97.960999825 2.593500262499994 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf564 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf565 6.30106886729 0 99.276000625 0.6209990625000046 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf566 2.50228643329 0 98.91400005 1.1639999249999988 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf567 3.92040413524 0 99.577500375 0.5124996249999981 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf568 2.50228643329 0 99.423500325 0.6664996749999915 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf569 3.85964385182 0 99.477500425 0.6124995749999954 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf570 2.47778695782 0 99.413500025 0.676499974999993 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf571 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf572 6.14799414721 0 99.504500025 0.5854999749999991 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf573 3.84474688915 0 99.570000775 0.5199992250000008 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf574 2.47778695782 0 99.013500075 1.0147498875000025 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf575 2.47124761202 0 99.385499425 0.4567508624999874 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf576 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf577 2.81322619695 0 99.545499275 0.5445007250000004 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf578 5.02870270579 0 99.44550065 0.6444993499999981 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf579 2.50228643329 0 99.1870008 0.7544987999999861 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf580 2.78229733114 0 99.51649875 0.5735012500000011 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf581 5.92620561097 0 99.553999775 0.5360002250000037 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf582 2.50228643329 0 99.368498525 0.4822522124999864 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf583 3.38717868509 0 99.547499275 0.5425007249999908 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf584 2.78229733114 0 99.57900015 0.5109998499999989 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf585 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf586 2.50228643329 0 99.375499 0.4717514999999892 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf587 2.81322619695 0 99.47250045 0.6174995499999995 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf588 2.50228643329 0 99.28450095 0.6082485750000046 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf589 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf590 6.14799414721 0 99.5135004 0.5764995999999997 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf591 6.14799414721 0 99.4965 0.5935000000000002 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf592 3.34244261096 0 99.561499825 0.5285001749999992 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf593 2.50228643329 0 99.029999175 0.990001237499996 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf594 6.30106886729 0 99.442000175 0.6479998249999938 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf595 2.47778695782 0 99.453499975 0.636500024999998 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf596 3.92040413524 0 99.56400005 0.5259999499999936 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf597 6.10789096832 0 99.339999275 0.5250010875000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf598 2.47124761202 0 99.3599983 0.49500254999999527 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf599 2.50228643329 0 99.469500725 0.6204992749999946 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf600 2.81322619695 0 99.589500425 0.5004995750000006 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf601 6.30106886729 0 99.2495001 0.6607498499999878 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf602 3.80166404425 0 99.522498725 0.5675012749999923 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf603 2.77405457184 0 99.742999825 0.34700017499999947 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf604 6.14799414721 0 99.5524999 0.5375000999999969 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf605 6.30106886729 0 99.446000725 0.6439992749999931 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf606 4.93072604433 0 99.47500045 0.6149995500000017 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf607 3.38717868509 0 99.574000075 0.515999924999997 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf608 3.33055390722 0 99.73199955 0.35800045000000014 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf609 3.92040413524 0 99.575000325 0.5149996749999929 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf610 4.90489779833 0 99.31799945 0.5580008249999935 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf611 6.30106886729 0 99.466000575 0.6239994250000024 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf612 6.14799414721 0 99.46900035 0.620999649999996 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf613 6.10789096832 0 99.4139997 0.6760002999999927 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf614 3.7862916372 0 99.3359989 0.5310016499999861 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf615 2.47778695782 0 99.5030006 0.5869994000000048 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf616 3.34244261096 0 99.577500025 0.5124999749999916 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf617 3.92040413524 0 99.5005001 0.5894999000000013 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf618 6.14799414721 0 99.420000025 0.6699999750000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf619 2.47778695782 0 99.4209994 0.6690005999999983 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf620 2.47124761202 0 99.224499975 0.6982500374999958 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf621 6.30106886729 0 99.428499925 0.6615000750000007 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf622 3.85964385182 0 99.435500575 0.6544994249999917 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf623 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf624 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf625 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf626 3.84474688915 0 99.7199993 0.37000069999999996 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf627 3.86059861244 0 99.49699995 0.5930000499999949 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf628 3.92040413524 0 99.320000325 0.5549995125000038 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf629 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf630 3.85964385182 0 99.523999725 0.5660002750000018 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf631 1.97610564729 0 99.660004 0.42999599999999705 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf632 6.30106886729 0 99.512499875 0.5775001249999946 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf633 2.78229733114 0 99.576000025 0.5139999749999987 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf634 6.10789096832 0 99.57550015 0.5144998500000014 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf635 4.90489779833 0 99.329999825 0.5400002624999942 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf636 3.34244261096 0 99.58599985 0.5040001500000045 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf637 1.97610564729 0 99.379997 0.46500449999999205 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf638 6.30106886729 0 99.455499575 0.6345004249999932 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf639 3.85964385182 0 99.263500425 0.639749362499991 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf640 6.14799414721 0 99.422499575 0.6675004249999944 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf641 2.47778695782 0 99.417500075 0.6724999249999911 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf642 6.14799414721 0 99.490500175 0.5994998250000038 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf643 2.50228643329 0 99.374999575 0.4725006374999907 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf644 2.50228643329 0 99.011500375 1.017749437500001 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf645 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf646 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf647 2.47778695782 0 99.503999925 0.5860000749999955 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf648 6.10789096832 0 99.33900025 0.5264996249999996 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf649 6.10789096832 0 99.4024996 0.687500399999999 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf650 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf651 2.50228643329 0 98.8884997 1.202250450000001 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf652 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf653 3.92040413524 0 99.47850045 0.6114995499999992 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf654 3.77195447337 0 99.641501475 0.4484985249999994 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf655 5.02870270579 0 99.2675 0.6337499999999991 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf656 6.30106886729 0 99.470500425 0.6194995750000004 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf657 2.47778695782 0 99.435000375 0.6549996249999964 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf658 6.30106886729 0 99.43799965 0.652000350000003 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf659 2.50228643329 0 98.3299994 2.0400008999999883 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf660 6.14799414721 0 99.508499725 0.5815002750000048 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf661 3.38717868509 0 99.33649845 0.5302523250000064 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf662 2.81322619695 0 99.5795002 0.5104997999999995 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf663 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf664 2.47124761202 0 99.494999875 0.5950001249999929 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf665 3.80166404425 0 99.47750075 0.6124992499999934 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf666 3.86059861244 0 99.58 0.5099999999999995 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf667 6.30106886729 0 99.441500025 0.6484999750000014 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf668 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf669 3.80166404425 0 99.525999075 0.5640009249999963 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf670 2.47124761202 0 99.323999 0.5490014999999957 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf671 2.50228643329 0 98.73550015 1.4317497750000072 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf672 6.30106886729 0 99.265999825 0.636000262500005 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf673 6.14799414721 0 99.4690007 0.6209993000000026 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf674 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf675 4.78704248134 0 99.554999925 0.5350000749999936 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf676 6.14799414721 0 99.559499325 0.5305006749999933 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf677 2.50228643329 0 99.29100025 0.5984996250000023 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf678 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf679 2.50228643329 0 99.366998475 0.4845022874999927 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf680 2.50228643329 0 97.97399995 2.5740000749999865 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf681 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf682 3.92040413524 0 99.5735001 0.5164998999999938 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf683 2.50228643329 0 99.032999775 0.9855003375000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf684 2.50228643329 0 99.280000725 0.6149989125000062 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf685 2.50228643329 0 98.88899965 1.201500524999993 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf686 2.50228643329 0 99.195000075 0.7424998875000028 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf687 2.50228643329 0 99.018499825 1.007250262499987 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf688 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf689 3.85964385182 0 99.27300055 0.6254991749999874 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf690 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf691 6.30106886729 0 99.468499525 0.6215004749999992 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf692 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf693 3.85964385182 0 99.4565008 0.6334991999999972 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf694 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf695 6.30106886729 0 99.447499925 0.6425000749999953 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf696 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf697 6.30106886729 0 99.501999875 0.5880001250000021 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf698 5.02870270579 0 99.467500675 0.6224993250000012 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf699 2.50228643329 0 99.435499925 0.6545000749999957 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf700 2.50228643329 0 99.4214999 0.6685000999999972 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf701 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf702 3.85964385182 0 99.47450035 0.6154996499999982 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf703 2.81322619695 0 99.4615009 0.6284990999999934 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf704 2.50228643329 0 98.3229992 2.0505011999999994 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf705 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf706 3.85964385182 0 99.436500175 0.6534998249999916 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf707 6.30106886729 0 99.43250055 0.6574994499999974 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf708 3.92040413524 0 99.559498975 0.530501025000001 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf709 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf710 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf711 2.50228643329 0 97.63800065 3.077999025000004 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf712 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf713 3.92040413524 0 99.318500375 0.5572494374999977 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf714 2.50228643329 0 98.90349935 1.1797509749999904 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf715 3.92040413524 0 99.5524996 0.5375003999999933 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf716 6.30106886729 0 99.2610001 0.6434998499999907 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf717 6.30106886729 0 99.446500275 0.6434997249999924 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf718 3.38717868509 0 99.567999775 0.5220002249999937 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf719 2.50228643329 0 99.101000225 0.883499662499986 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf720 3.85964385182 0 99.478500775 0.6114992249999972 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf721 2.47778695782 0 99.39299885 0.6970011499999998 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf722 3.85964385182 0 99.528499175 0.5615008250000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf723 6.14799414721 0 99.460000125 0.6299998750000043 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf724 2.50228643329 0 99.46000075 0.6299992499999917 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf725 3.92040413524 0 99.332999525 0.5355007124999887 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf726 3.80166404425 0 99.570999825 0.5190001749999965 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf727 2.47778695782 0 99.303500625 0.5797490624999995 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf728 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf729 3.92040413524 0 99.4830003 0.6069996999999973 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf730 6.30106886729 0 99.519000275 0.5709997250000015 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf731 5.02870270579 0 99.5234998 0.5665002000000016 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf732 3.86059861244 0 99.611500325 0.47849967500000334 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf733 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf734 2.50228643329 0 99.033498825 0.9847517625000037 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf735 3.38717868509 0 99.3329991 0.5355013500000041 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf736 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf737 6.14799414721 0 99.50600015 0.5839998499999922 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf738 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf739 3.34244261096 0 99.5624996 0.5275004000000024 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf740 2.47124761202 0 99.36199835 0.4920024750000067 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf741 6.14799414721 0 99.5104998 0.5795001999999926 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf742 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf743 2.81322619695 0 99.333499225 0.534751162500001 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf744 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf745 3.38717868509 0 99.5830001 0.5069998999999911 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf746 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf747 2.81322619695 0 99.587500225 0.5024997749999983 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf748 3.85964385182 0 99.462500975 0.6274990250000002 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf749 6.30106886729 0 99.44100045 0.6489995499999935 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf750 3.86059861244 0 99.5044995 0.5855005000000034 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf751 6.10789096832 0 99.337499975 0.5287500374999965 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf752 3.92040413524 0 99.580999475 0.509000524999999 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf753 2.50228643329 0 99.394999575 0.6950004249999978 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf754 6.14799414721 0 99.420499375 0.6695006249999921 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf755 2.47778695782 0 99.374499425 0.47325086250000226 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf756 6.30106886729 0 99.43549985 0.6545001499999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf757 2.50228643329 0 99.43300005 0.6569999499999938 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf758 3.84474688915 0 99.4280003 0.6619997000000041 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf759 5.92620561097 0 99.533499625 0.5565003749999932 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf760 6.10789096832 0 99.575999775 0.5140002249999981 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf761 6.10789096832 0 99.40649965 0.6835003499999971 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf762 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf763 6.14799414721 0 99.505500225 0.584499774999992 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf764 2.47778695782 0 99.322999075 0.5505013874999989 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf765 6.30106886729 0 99.4675 0.6224999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf766 4.90489779833 0 99.32749945 0.5437508249999894 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf767 3.85964385182 0 99.432499925 0.6575000749999959 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf768 6.10789096832 0 99.578999975 0.5110000249999956 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf769 4.93072604433 0 99.5179994 0.5720006000000041 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf770 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf771 3.86059861244 0 99.557999225 0.5320007749999945 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf772 2.78229733114 0 99.614500525 0.4754994750000009 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf773 5.02870270579 0 99.432000675 0.6579993250000001 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf774 4.93072604433 0 99.517499825 0.5725001749999962 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf775 2.47778695782 0 99.4125002 0.6774998000000011 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf776 3.77195447337 0 99.6390002 0.4509997999999996 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf777 2.81322619695 0 99.538499125 0.5515008749999964 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf778 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf779 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf780 3.92040413524 0 99.4845006 0.6054993999999937 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf781 6.30106886729 0 99.501500475 0.5884995249999975 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf782 2.77405457184 0 99.736499225 0.3535007749999949 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf783 3.84474688915 0 99.42049985 0.669500149999999 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf784 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf785 2.50228643329 0 99.36849925 0.48225112499999767 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf786 6.30106886729 0 99.465499775 0.6245002249999999 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf787 3.85964385182 0 99.481500875 0.608499125000003 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf788 3.80166404425 0 99.42100005 0.6689999499999942 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf789 4.93072604433 0 99.511499675 0.5785003250000017 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf790 6.14799414721 0 99.5054997 0.5845002999999963 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf791 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf792 2.44096937877 0 99.4470006 0.6429994000000022 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf793 2.47778695782 0 98.260500175 2.1442497374999903 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf794 6.10789096832 0 99.33799975 0.5280003750000049 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf795 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf796 2.44096937877 0 99.353998575 0.5040021374999881 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf797 2.50228643329 0 99.42099945 0.6690005500000012 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf798 6.30106886729 0 99.442500625 0.6474993750000039 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf799 6.30106886729 0 99.428500225 0.6614997750000043 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf800 3.92040413524 0 99.56549965 0.5245003499999911 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf801 2.47124761202 0 99.185500075 0.7567498875000069 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf802 3.92040413524 0 99.53350015 0.556499850000003 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf803 6.14799414721 0 99.5049996 0.5850003999999928 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf804 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf805 2.00016617632 0 98.379997 1.965004499999992 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf806 6.14799414721 0 99.464499625 0.6255003749999958 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf807 1.99590274244 0 98.660004 1.5449939999999955 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf808 3.85964385182 0 99.53049925 0.5595007499999923 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf809 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf810 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf811 3.86059861244 0 99.591500125 0.4984998750000017 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf812 2.50228643329 0 99.382499425 0.4612508624999876 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf813 2.47124761202 0 98.818999725 1.3065004125000002 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf814 3.86059861244 0 99.6200006 0.4699994000000004 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf815 2.73595882486 0 99.6375013 0.45249870000000103 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf816 6.14799414721 0 99.545999725 0.5440002749999963 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf817 3.7862916372 0 99.3424988 0.5212517999999946 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf818 2.77405457184 0 99.4264998 0.6635001999999958 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf819 2.50228643329 0 99.0920006 0.896999099999988 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf820 5.02870270579 0 99.476000675 0.6139993250000032 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf821 3.92040413524 0 99.3214991 0.5527513500000012 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf822 2.47124761202 0 98.535999325 1.7310010124999877 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf823 2.78229733114 0 99.551999525 0.5380004749999984 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf824 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf825 3.85964385182 0 99.473999875 0.6160001249999937 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf826 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf827 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf828 6.30106886729 0 99.46050055 0.6294994499999916 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf829 2.50228643329 0 99.4435005 0.6464994999999988 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf830 2.81322619695 0 99.538499475 0.551500525000003 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf831 2.50228643329 0 98.230000275 2.1899995875000045 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf832 2.50228643329 0 99.466500825 0.6234991750000006 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf833 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf834 6.30106886729 0 99.4185003 0.6714996999999926 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf835 2.50228643329 0 98.889999625 1.2000005624999943 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf836 3.38717868509 0 99.478001025 0.6119989750000002 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf837 5.02870270579 0 99.467500025 0.622499974999991 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf838 2.50228643329 0 99.389499275 0.45075108749998805 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf839 2.81322619695 0 99.472000525 0.6179994749999992 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf840 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf841 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf842 3.85964385182 0 99.52599925 0.5640007499999996 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf843 2.50228643329 0 99.29100025 0.5984996250000023 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf844 3.92040413524 0 99.5589993 0.5310007000000013 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf845 3.85964385182 0 99.47500025 0.6149997500000041 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf846 3.85964385182 0 99.469500475 0.620499524999994 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf847 3.92040413524 0 99.48150035 0.6084996499999932 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf848 3.92040413524 0 99.536 0.5539999999999964 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf849 2.50228643329 0 99.4420006 0.6479993999999977 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf850 2.50228643329 0 97.654500925 3.0532486125000062 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf851 6.30106886729 0 99.4430001 0.6469998999999916 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf852 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf853 5.02870270579 0 99.458000775 0.6319992249999956 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf854 3.38717868509 0 99.329499675 0.5407504875000058 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf855 2.50228643329 0 98.3030002 2.080499699999997 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf856 3.92040413524 0 99.33299985 0.5355002250000069 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf857 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf858 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf859 3.38717868509 0 99.57400005 0.5159999500000026 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf860 3.92040413524 0 99.542999475 0.5470005249999957 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf861 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf862 6.30106886729 0 99.504999775 0.585000224999996 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf863 2.50228643329 0 99.096000725 0.8909989125000024 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf864 2.50228643329 0 99.368999175 0.48150123749999807 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf865 6.30106886729 0 99.24349955 0.6697506750000031 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf866 5.02870270579 0 99.48200025 0.6079997499999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf867 5.02870270579 0 99.43700025 0.6529997500000008 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf868 2.50228643329 0 99.370499525 0.4792507124999972 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf869 2.50228643329 0 99.41749965 0.6725003500000014 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf870 3.85964385182 0 99.431000575 0.658999424999999 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf871 6.30106886729 0 99.46350045 0.6264995499999998 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf872 5.02870270579 0 99.512999425 0.5770005749999939 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf873 5.02870270579 0 99.269500575 0.630749137500004 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf874 2.50228643329 0 99.180999675 0.7635004875000035 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf875 2.50228643329 0 99.409499525 0.6805004749999967 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf876 2.50228643329 0 98.9994999 1.0357501499999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf877 2.47778695782 0 99.42899995 0.6610000499999927 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf878 3.38717868509 0 99.483500225 0.6064997749999975 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf879 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf880 3.33055390722 0 99.4320002 0.6579997999999933 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf881 3.33055390722 0 99.5704994 0.519500599999995 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf882 3.86059861244 0 99.610500025 0.4794999750000045 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf883 2.81322619695 0 99.577000375 0.5129996250000005 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf884 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf885 2.47778695782 0 99.40549945 0.6845005500000042 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf886 3.38717868509 0 99.569 0.5209999999999951 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf887 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf888 2.50228643329 0 99.43799995 0.6520000499999924 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf889 2.50228643329 0 99.473001075 0.6169989249999986 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf890 6.14799414721 0 99.555999325 0.5340006749999958 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf891 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf892 4.90489779833 0 99.4159999 0.674000099999995 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf893 4.90489779833 0 99.3299996 0.5400006000000062 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf894 4.93072604433 0 99.52499955 0.5650004499999938 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf895 3.86059861244 0 99.506999625 0.5830003749999918 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf896 3.86059861244 0 99.582499975 0.5075000249999931 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf897 3.92040413524 0 99.542500075 0.5474999249999911 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf898 2.47778695782 0 99.442000175 0.6479998249999938 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf899 2.50228643329 0 99.28750065 0.603749024999999 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf900 6.30106886729 0 99.43350035 0.656499649999995 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf901 6.14799414721 0 99.49950045 0.5904995499999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf902 5.02870270579 0 99.47200005 0.6179999499999923 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf903 3.34244261096 0 99.613500575 0.4764994249999944 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf904 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf905 3.7862916372 0 99.41650005 0.6734999500000015 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf906 6.14799414721 0 99.455000325 0.6349996749999974 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf907 2.50228643329 0 99.4335006 0.6564993999999956 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf908 6.14799414721 0 99.50550005 0.5844999500000029 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf909 2.47124761202 0 99.319999725 0.555000412499993 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf910 2.50228643329 0 99.42550005 0.6644999500000012 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf911 6.10789096832 0 99.56899955 0.5210004499999968 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf912 3.85964385182 0 99.48000055 0.609999449999998 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf913 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf914 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf915 2.44096937877 0 99.01699955 1.0095006750000053 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf916 4.90489779833 0 99.58049985 0.5095001500000024 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf917 2.47124761202 0 99.49900015 0.5909998499999972 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf918 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf919 4.93072604433 0 99.5589997 0.5310002999999966 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf920 5.02870270579 0 99.473000225 0.6169997749999908 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf921 3.92040413524 0 99.55299925 0.5370007499999986 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf922 3.85964385182 0 99.53349935 0.5565006499999982 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf923 6.30106886729 0 99.472000025 0.6179999749999979 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf924 2.50228643329 0 99.182499825 0.7612502625000062 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf925 3.86059861244 0 99.60850085 0.4814991499999991 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf926 6.14799414721 0 99.412 0.6779999999999916 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf927 6.30106886729 0 99.4530003 0.6369996999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf928 6.30106886729 0 99.499999825 0.5900001749999945 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf929 6.10789096832 0 99.407000125 0.6829998750000016 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf930 2.47778695782 0 99.322999475 0.5505007874999919 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf931 6.30106886729 0 99.419000225 0.6709997749999929 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf932 3.80166404425 0 99.48450075 0.6054992500000026 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf933 2.50228643329 0 98.997999775 1.0380003375000015 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf934 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf935 6.30106886729 0 99.467999775 0.6220002250000022 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf936 2.47778695782 0 98.96700135 1.0844979749999908 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf937 3.77195447337 0 99.637001 0.4529989999999998 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf938 6.30106886729 0 99.512999625 0.5770003749999916 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf939 2.47778695782 0 98.9210002 1.1534997000000047 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf940 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf941 2.47124761202 0 99.3819992 0.4620012000000031 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf942 2.50228643329 0 99.393999775 0.6960002250000002 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf943 2.47778695782 0 99.4064995 0.6835005000000024 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf944 6.14799414721 0 99.41999975 0.6700002499999954 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf945 3.71567552873 0 99.5574991 0.5325008999999966 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf946 4.93072604433 0 99.57149935 0.5185006500000015 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf947 2.00016617632 0 98.419998 1.9050029999999865 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf948 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf949 3.92040413524 0 99.55049875 0.539501249999995 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf950 3.34244261096 0 99.5865 0.5034999999999968 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf951 3.38717868509 0 99.541499675 0.5485003250000006 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf952 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf953 2.50228643329 0 98.90750005 1.1737499250000027 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf954 2.44096937877 0 98.241500025 2.1727499625000064 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf955 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf956 2.50228643329 0 99.439500375 0.6504996250000034 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf957 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf958 3.38717868509 0 99.5725004 0.5174996000000022 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf959 2.50228643329 0 99.001500125 1.0327498124999863 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf960 2.44096937877 0 99.376498775 0.470251837499994 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf961 3.92040413524 0 99.5484993 0.5415006999999946 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf962 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf963 3.7862916372 0 99.584000125 0.5059998749999949 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf964 2.47124761202 0 99.026499175 0.9952512374999998 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf965 3.80166404425 0 99.516499625 0.5735003750000033 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf966 3.85964385182 0 99.477000125 0.6129998749999942 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf967 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf968 3.86059861244 0 99.56150015 0.5284998499999972 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf969 6.14799414721 0 99.508499775 0.5815002249999935 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf970 2.50228643329 0 98.90099985 1.1835002249999889 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf971 1.99590274244 0 98.660004 1.5449939999999955 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf972 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf973 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf974 2.47124761202 0 98.536999 1.729501500000005 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf975 2.50228643329 0 98.23449995 2.1832500749999966 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf976 6.10789096832 0 99.406999775 0.683000224999995 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf977 6.14799414721 0 99.545499575 0.544500425000004 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf978 3.86059861244 0 99.61050155 0.4794984500000027 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf979 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf980 2.50228643329 0 99.4215 0.6685000000000031 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf981 3.7862916372 0 99.42750005 0.6624999499999916 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf982 3.34244261096 0 99.562499425 0.5275005749999991 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf983 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf984 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf985 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf986 5.92620561097 0 99.5314992 0.5585007999999988 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf987 2.78229733114 0 99.553500025 0.5364999749999925 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf988 2.47778695782 0 99.183000125 0.7604998124999867 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf989 3.86059861244 0 99.609000625 0.48099937500000467 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf990 3.86059861244 0 99.5019997 0.5880002999999988 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf991 6.30106886729 0 99.457000075 0.6329999250000015 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf992 3.84474688915 0 99.568499675 0.5215003249999995 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf993 6.14799414721 0 99.46 0.630000000000004 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf994 3.38717868509 0 99.3379988 0.5280018000000055 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf995 6.10789096832 0 99.577499425 0.5125005749999986 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf996 3.86059861244 0 99.5895003 0.5004997000000003 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf997 2.50228643329 0 99.37899965 0.4665005250000007 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf998 3.85964385182 0 99.475000975 0.6149990249999974 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf999 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1000 3.92040413524 0 99.559499525 0.530500474999991 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1001 6.30106886729 0 99.427999925 0.6620000750000031 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1002 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1003 2.50228643329 0 98.336499375 2.030250937499993 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1004 3.85964385182 0 99.4305002 0.6594998000000004 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1005 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1006 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1007 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1008 2.50228643329 0 99.2740006 0.623999100000006 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1009 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1010 5.02870270579 0 99.26250005 0.6412499249999968 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1011 2.50228643329 0 99.369499075 0.4807513875000069 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1012 5.02870270579 0 99.44500045 0.6449995500000029 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1013 2.50228643329 0 97.6395004 3.0757493999999923 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1014 5.02870270579 0 99.474500525 0.6154994750000015 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1015 3.92040413524 0 99.492000325 0.5979996749999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1016 3.92040413524 0 99.331499525 0.5377507124999994 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1017 6.30106886729 0 99.426500225 0.6634997749999997 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1018 3.38717868509 0 99.543499225 0.5465007749999927 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1019 3.38717868509 0 99.335999 0.531001499999995 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1020 3.92040413524 0 99.5434993 0.5465007000000043 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1021 2.50228643329 0 99.002499975 1.0312500374999871 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1022 3.38717868509 0 99.586500475 0.5034995250000037 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1023 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1024 5.02870270579 0 99.51149985 0.5785001499999908 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1025 3.85964385182 0 99.2685002 0.6322496999999885 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1026 2.81322619695 0 99.546999225 0.5430007750000044 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1027 3.38717868509 0 99.47600085 0.6139991499999923 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1028 2.81322619695 0 99.588999925 0.5010000750000018 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1029 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1030 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1031 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1032 2.50228643329 0 98.9885 1.0522499999999937 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1033 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1034 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1035 6.30106886729 0 99.516000025 0.573999975000001 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1036 6.30106886729 0 99.2645006 0.6382490999999888 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1037 6.30106886729 0 99.45900045 0.6309995499999929 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1038 2.50228643329 0 99.369999225 0.48000116249999536 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1039 2.50228643329 0 99.37249935 0.47625097499999924 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1040 5.02870270579 0 99.4795001 0.6104999000000021 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1041 2.50228643329 0 99.469000975 0.6209990249999976 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1042 6.30106886729 0 99.45850035 0.6314996500000035 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1043 2.81322619695 0 99.33899865 0.5265020250000063 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1044 3.85964385182 0 99.531499475 0.5585005249999938 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1045 3.85964385182 0 99.470500225 0.6194997750000028 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1046 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1047 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1048 2.47124761202 0 99.19199955 0.7470006749999882 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1049 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1050 2.50228643329 0 99.43900005 0.6509999499999936 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1051 2.50228643329 0 99.035499225 0.9817511625000037 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1052 6.10789096832 0 99.575000075 0.5149999249999923 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1053 2.47124761202 0 99.26700025 0.6344996250000037 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1054 2.44096937877 0 99.011999775 1.0170003374999865 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1055 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1056 4.93072604433 0 99.5204999 0.5695000999999934 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1057 3.92040413524 0 99.576999675 0.5130003250000016 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1058 6.14799414721 0 99.550499275 0.5395007249999907 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1059 6.10789096832 0 99.41649995 0.6735000499999956 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1060 6.14799414721 0 99.45850075 0.6314992499999988 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1061 2.47778695782 0 99.001499775 1.0327503374999978 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1062 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1063 2.50228643329 0 99.419999775 0.670000225000004 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1064 2.50228643329 0 99.01399965 1.014000524999993 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1065 3.85964385182 0 99.457500475 0.6324995249999944 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1066 4.90489779833 0 99.413499575 0.6765004249999947 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1067 2.50228643329 0 99.363998825 0.4890017625000027 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1068 2.47778695782 0 99.4510009 0.6389991000000009 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1069 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1070 2.44096937877 0 99.3414992 0.5227511999999948 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1071 3.92040413524 0 99.46549975 0.6245002499999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1072 6.14799414721 0 99.502499625 0.5875003749999991 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1073 5.02870270579 0 99.26700025 0.6344996250000037 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1074 6.10789096832 0 99.327999975 0.5430000375000006 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1075 3.7862916372 0 99.425500025 0.6644999749999926 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1076 3.80166404425 0 99.519499 0.5705010000000016 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1077 2.47778695782 0 99.3090006 0.5714990999999898 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1078 2.78229733114 0 99.5504996 0.5395004000000029 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1079 6.14799414721 0 99.5105 0.5795000000000045 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1080 2.47124761202 0 99.356998675 0.49950198749999686 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1081 3.92040413524 0 99.551499975 0.538500024999999 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1082 3.92040413524 0 99.5609997 0.5290003000000013 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1083 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1084 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1085 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1086 3.80166404425 0 99.5354992 0.5545007999999939 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1087 6.30106886729 0 99.42449985 0.6655001499999941 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1088 6.30106886729 0 99.510999925 0.5790000750000047 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1089 2.47778695782 0 99.405999625 0.6840003749999909 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1090 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1091 3.38717868509 0 99.54649955 0.5435004500000048 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1092 6.14799414721 0 99.4960001 0.5939998999999944 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1093 6.30106886729 0 99.46200015 0.6279998500000034 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1094 6.30106886729 0 99.263000125 0.6404998124999892 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1095 6.30106886729 0 99.469499875 0.620500125000001 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1096 2.81322619695 0 99.576999575 0.5130004249999957 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1097 3.38717868509 0 99.3319991 0.5370013499999899 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1098 3.86059861244 0 99.6120008 0.47799919999999363 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1099 5.02870270579 0 99.467000225 0.622999774999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1100 4.93072604433 0 99.51999945 0.5700005499999975 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1101 3.86059861244 0 99.5900002 0.499999799999992 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1102 2.50228643329 0 99.0934995 0.8947507500000071 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1103 6.14799414721 0 99.41949945 0.6705005499999942 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1104 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1105 3.7862916372 0 99.331499775 0.5377503375000003 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1106 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1107 5.92620561097 0 99.541999675 0.5480003249999982 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1108 2.77405457184 0 99.5785001 0.5114998999999983 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1109 2.50228643329 0 99.39249935 0.6975006500000035 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1110 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1111 5.02870270579 0 99.5159995 0.5740004999999911 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1112 6.14799414721 0 99.465999875 0.6240001250000035 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1113 2.50228643329 0 99.415999925 0.6740000750000036 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1114 3.34244261096 0 99.60100035 0.488999649999991 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1115 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1116 3.92040413524 0 99.328499625 0.5422505624999872 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1117 3.85964385182 0 99.52999935 0.5600006500000007 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1118 2.50228643329 0 99.098500425 0.8872493625000004 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1119 6.30106886729 0 99.458000075 0.6319999249999967 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1120 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1121 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1122 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1123 6.30106886729 0 99.507499575 0.5825004250000007 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1124 3.27579123647 0 99.64050075 0.4494992499999967 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1125 6.14799414721 0 99.49900015 0.5909998499999972 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1126 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1127 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1128 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1129 3.38717868509 0 99.333498075 0.5347528875000052 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1130 3.86059861244 0 99.5950008 0.49499920000000375 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1131 2.78229733114 0 99.580499625 0.5095003749999961 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1132 3.34244261096 0 99.5804993 0.5095006999999981 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1133 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1134 3.86059861244 0 99.592499975 0.49750002500000223 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1135 3.86059861244 0 99.61300085 0.4769991499999918 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1136 6.30106886729 0 99.429000375 0.6609996249999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1137 1.97610564729 0 98.440002 1.8749969999999863 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1138 2.47778695782 0 99.402999625 0.687000374999991 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1139 3.80166404425 0 99.51099975 0.5790002500000014 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1140 2.81322619695 0 99.5854998 0.504500200000004 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1141 2.50228643329 0 99.038999575 0.9765006374999885 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1142 6.30106886729 0 99.4325004 0.6574996000000027 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1143 6.10789096832 0 99.4115002 0.6784997999999917 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1144 6.10789096832 0 99.332999725 0.5355004125000065 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1145 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1146 6.14799414721 0 99.428500075 0.6614999249999954 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1147 3.80166404425 0 99.567500275 0.5224997249999973 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1148 2.50228643329 0 98.29800005 2.0879999249999983 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1149 3.86059861244 0 99.4985004 0.5914996000000002 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1150 3.33055390722 0 99.43350015 0.6564998499999973 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1151 6.30106886729 0 99.477000475 0.6129995250000008 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1152 6.14799414721 0 99.495000225 0.5949997749999995 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1153 2.47778695782 0 98.9710005 1.078499249999993 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1154 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1155 2.44096937877 0 99.014999325 1.012501012499989 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1156 4.93072604433 0 99.483500225 0.6064997749999975 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1157 2.47778695782 0 99.42549995 0.6645000499999952 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1158 2.78229733114 0 99.5529994 0.5370005999999933 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1159 2.78229733114 0 99.605500375 0.48449962499999233 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1160 3.92040413524 0 99.55899965 0.5310003499999937 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1161 5.02870270579 0 99.4565008 0.6334991999999972 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1162 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1163 2.47124761202 0 99.3154995 0.5617507499999945 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1164 6.10789096832 0 99.579999725 0.5100002750000044 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1165 6.30106886729 0 99.262000125 0.6419998124999964 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1166 3.86059861244 0 99.5529991 0.5370009000000039 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1167 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1168 5.02870270579 0 99.253000425 0.6554993625000023 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1169 3.92040413524 0 99.581499875 0.508500124999992 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1170 2.50228643329 0 99.467500525 0.6224994749999923 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1171 2.50228643329 0 98.75049975 1.409250374999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1172 3.38717868509 0 99.5780001 0.5119999000000007 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1173 6.30106886729 0 99.467501075 0.6224989249999965 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1174 5.02870270579 0 99.473000175 0.6169998250000021 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1175 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1176 2.50228643329 0 99.3804997 0.46425044999999443 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1177 2.50228643329 0 99.0319994 0.9870008999999911 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1178 3.38717868509 0 99.5639997 0.5260003000000012 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1179 2.50228643329 0 99.185500475 0.7567492874999999 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1180 3.85964385182 0 99.27000015 0.6299997749999946 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1181 6.30106886729 0 99.43400015 0.6559998499999949 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1182 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1183 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1184 2.50228643329 0 99.3659993 0.4860010499999987 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1185 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1186 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1187 3.92040413524 0 99.31799965 0.55800052499999 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1188 3.92040413524 0 99.489999925 0.6000000749999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1189 2.50228643329 0 99.394499425 0.6955005749999913 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1190 3.92040413524 0 99.560999775 0.5290002249999987 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1191 6.30106886729 0 99.435999925 0.6540000749999934 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1192 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1193 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1194 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1195 6.30106886729 0 99.455999775 0.6340002250000026 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1196 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1197 3.85964385182 0 99.466000075 0.6239999250000011 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1198 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1199 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1200 6.30106886729 0 99.50349965 0.5865003500000029 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1201 3.38717868509 0 99.476000475 0.6139995249999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1202 2.50228643329 0 98.9125001 1.1662498499999927 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1203 3.92040413524 0 99.55849915 0.5315008499999948 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1204 2.50228643329 0 97.895000075 2.6924998874999986 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1205 2.50228643329 0 99.373498975 0.47475153749999066 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1206 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1207 5.02870270579 0 99.451001075 0.6389989250000042 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1208 2.50228643329 0 99.015499925 1.0117501124999961 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1209 2.50228643329 0 98.9950004 1.042499400000004 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1210 2.81322619695 0 99.5870001 0.5029999000000004 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1211 2.50228643329 0 99.468000825 0.6219991749999935 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1212 2.50228643329 0 99.44400025 0.6459997499999958 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1213 3.38717868509 0 99.581000225 0.5089997750000009 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1214 5.02870270579 0 99.519500225 0.5704997749999962 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1215 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1216 2.81322619695 0 99.5439997 0.5460002999999972 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1217 2.50228643329 0 99.472000875 0.6179991249999915 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1218 2.50228643329 0 99.36899875 0.4815018749999922 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1219 6.30106886729 0 99.263999775 0.6390003374999935 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1220 6.30106886729 0 99.45150035 0.6384996499999943 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1221 3.85964385182 0 99.2720009 0.6269986500000044 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1222 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1223 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1224 2.47124761202 0 99.3569989 0.49950165000000624 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1225 2.47778695782 0 99.005000125 1.0274998125000039 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1226 3.85964385182 0 99.482000425 0.6079995750000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1227 2.50228643329 0 99.193499925 0.7447501125000002 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1228 5.02870270579 0 99.440000575 0.6499994249999986 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1229 3.85964385182 0 99.535499725 0.5545002750000038 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1230 3.7862916372 0 99.426000075 0.6639999249999932 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1231 6.14799414721 0 99.511999775 0.578000224999991 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1232 6.30106886729 0 99.434500575 0.6554994249999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1233 3.77195447337 0 99.6400006 0.4499994000000044 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1234 3.80166404425 0 99.526999625 0.5630003749999958 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1235 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1236 2.47778695782 0 99.029999325 0.9900010124999881 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1237 5.02870270579 0 99.482000275 0.6079997249999934 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1238 2.47778695782 0 99.403499575 0.6865004249999999 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1239 6.10789096832 0 99.323999075 0.5490013874999917 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1240 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1241 2.77405457184 0 99.42650065 0.6634993500000036 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1242 2.50228643329 0 99.438000575 0.651999424999994 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1243 6.30106886729 0 99.243999875 0.6690001874999965 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1244 6.14799414721 0 99.4244999 0.665500099999997 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1245 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1246 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1247 2.50228643329 0 99.02999865 0.9900020250000026 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1248 3.34244261096 0 99.508000025 0.5819999749999966 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1249 2.47778695782 0 99.48050135 0.6094986500000005 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1250 2.44096937877 0 99.0244998 0.9982502999999951 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1251 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1252 2.47124761202 0 99.38599915 0.4560012749999913 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1253 6.30106886729 0 99.432500225 0.6574997749999995 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1254 3.33055390722 0 99.574000275 0.5159997249999947 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1255 5.02870270579 0 99.268000775 0.63299883749999 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1256 3.92040413524 0 99.543499525 0.5465004749999963 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1257 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1258 6.14799414721 0 99.498500225 0.591499774999997 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1259 3.85964385182 0 99.48400015 0.6059998499999978 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1260 2.50228643329 0 99.2885006 0.6022490999999874 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1261 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1262 5.92620561097 0 99.547499375 0.5425006249999967 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1263 4.93072604433 0 99.42350015 0.6664998500000024 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1264 6.30106886729 0 99.5030001 0.5869999000000036 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1265 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1266 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1267 1.99590274244 0 99.099998 0.8850029999999975 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1268 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1269 6.30106886729 0 99.46300005 0.6269999499999926 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1270 2.50228643329 0 99.42400025 0.6659997499999918 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1271 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1272 3.38717868509 0 99.5705005 0.5194995000000034 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1273 3.86059861244 0 99.622001025 0.46799897499999477 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1274 6.14799414721 0 99.553999775 0.5360002250000037 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1275 6.14799414721 0 99.5039994 0.5860005999999999 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1276 2.81322619695 0 99.59050025 0.4994997499999926 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1277 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1278 6.14799414721 0 99.45850055 0.6314994500000012 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1279 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1280 2.50228643329 0 98.999500425 1.035749362500006 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1281 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1282 2.47124761202 0 99.37799915 0.468001275000006 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1283 4.90489779833 0 99.584500425 0.5054995749999961 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1284 3.80166404425 0 99.423000075 0.6669999249999933 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1285 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1286 5.92620561097 0 99.540499425 0.5495005750000047 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1287 6.30106886729 0 99.47250045 0.6174995499999995 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1288 3.92040413524 0 99.551499425 0.5385005749999948 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1289 2.47124761202 0 98.8219998 1.302000299999996 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1290 4.90489779833 0 99.412499525 0.6775004749999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1291 2.78229733114 0 99.517998175 0.5720018249999953 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1292 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1293 2.47778695782 0 99.43750055 0.652499450000002 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1294 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1295 3.38717868509 0 99.474000925 0.6159990749999992 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1296 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1297 6.10789096832 0 99.406000225 0.6839997749999981 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1298 2.47778695782 0 98.236000025 2.180999962500003 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1299 2.50228643329 0 99.004499675 1.0282504874999887 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1300 2.81322619695 0 99.585500025 0.504499974999996 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1301 2.77405457184 0 99.4325001 0.6574998999999991 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1302 2.50228643329 0 98.298500325 2.0872495124999872 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1303 3.77195447337 0 99.63650085 0.4534991499999933 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1304 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1305 2.47778695782 0 98.9780005 1.0679992500000068 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1306 2.47124761202 0 99.11650015 0.8602497750000069 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1307 6.30106886729 0 99.464500275 0.6254997249999917 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1308 3.7862916372 0 99.584499975 0.5055000249999978 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1309 3.80166404425 0 99.5214992 0.5685008000000039 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1310 2.50228643329 0 99.28400045 0.6089993250000063 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1311 6.14799414721 0 99.515500125 0.5744998749999951 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1312 3.85964385182 0 99.2660004 0.6359994000000029 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1313 2.47124761202 0 99.192499825 0.7462502624999985 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1314 2.47124761202 0 99.2194988 0.7057518000000016 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1315 6.14799414721 0 99.411000025 0.6789999749999908 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1316 6.30106886729 0 99.501499425 0.588500574999992 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1317 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1318 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1319 2.50228643329 0 98.240000175 2.174999737499988 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1320 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1321 2.50228643329 0 99.466000925 0.6239990749999947 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1322 6.14799414721 0 99.503499825 0.586500174999992 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1323 2.47778695782 0 99.322 0.5519999999999925 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1324 2.50228643329 0 99.371498925 0.47775161250000053 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1325 2.47778695782 0 99.42149925 0.6685007500000012 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1326 6.30106886729 0 99.26800005 0.632999925 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1327 6.14799414721 0 99.492500075 0.5974999250000025 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1328 5.02870270579 0 99.4635003 0.6264996999999909 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1329 2.78229733114 0 99.5604995 0.5295004999999918 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1330 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1331 2.50228643329 0 99.438000275 0.6519997250000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1332 3.92040413524 0 99.329498225 0.5407526625000045 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1333 2.50228643329 0 99.473000725 0.6169992749999921 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1334 1.97610564729 0 98.480003 1.814995500000002 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1335 6.30106886729 0 99.4390005 0.6509994999999918 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1336 6.14799414721 0 99.555500175 0.5344998249999918 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1337 4.78704248134 0 99.554999825 0.5350001750000019 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1338 2.47124761202 0 99.034499725 0.9832504124999915 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1339 2.47124761202 0 99.35549875 0.5017518750000036 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1340 2.50228643329 0 98.7550001 1.402499849999991 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1341 2.50228643329 0 98.317499175 2.0587512375000045 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1342 3.92040413524 0 99.476000275 0.6139997249999937 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1343 2.50228643329 0 99.396999175 0.6930008249999929 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1344 2.50228643329 0 99.280500325 0.6142495124999883 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1345 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1346 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1347 2.50228643329 0 98.8924994 1.1962508999999883 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1348 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1349 2.50228643329 0 99.37049845 0.47925232499999737 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1350 2.50228643329 0 98.7390001 1.426499849999999 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1351 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1352 6.30106886729 0 99.46000045 0.6299995500000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1353 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1354 5.02870270579 0 99.27500045 0.6224993250000068 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1355 3.92040413524 0 99.321999375 0.5520009374999901 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1356 3.85964385182 0 99.478000325 0.6119996750000013 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1357 2.50228643329 0 99.18700025 0.7544996250000011 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1358 3.38717868509 0 99.580999825 0.5090001749999914 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1359 6.30106886729 0 99.258000725 0.6479989124999932 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1360 3.85964385182 0 99.273500675 0.6247489875000056 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1361 3.85964385182 0 99.528999325 0.5610006749999968 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1362 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1363 2.50228643329 0 99.386499 0.4552514999999957 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1364 2.81322619695 0 99.58850025 0.5014997500000021 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1365 3.38717868509 0 99.471000175 0.6189998249999974 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1366 6.30106886729 0 99.44 0.65 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1367 3.92040413524 0 99.54600005 0.5439999499999942 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1368 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1369 2.81322619695 0 99.331999075 0.5370013874999984 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1370 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1371 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1372 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1373 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1374 2.50228643329 0 98.237999725 2.1780004125000048 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1375 2.50228643329 0 99.369999825 0.48000026250000616 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1376 3.38717868509 0 99.5664995 0.5235004999999916 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1377 6.30106886729 0 99.4435005 0.6464994999999988 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1378 2.50228643329 0 98.989500525 1.0507492125000013 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1379 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1380 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1381 3.92040413524 0 99.566499725 0.5235002749999978 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1382 3.38717868509 0 99.542499525 0.5475004750000011 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1383 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1384 3.92040413524 0 99.560999575 0.529000425000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1385 6.30106886729 0 99.4365008 0.6534991999999932 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1386 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1387 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1388 6.30106886729 0 99.50550005 0.5844999500000029 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1389 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1390 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1391 2.50228643329 0 99.185499625 0.7567505624999882 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1392 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1393 6.30106886729 0 99.257000175 0.649499737499994 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1394 5.02870270579 0 99.2665001 0.6352498499999939 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1395 3.84474688915 0 99.428999625 0.6610003749999948 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1396 2.47124761202 0 99.218499125 0.7072513125000057 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1397 6.10789096832 0 99.3400001 0.5249998500000004 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1398 2.81322619695 0 99.581499675 0.5085003249999943 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1399 6.14799414721 0 99.5084996 0.5815004000000045 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1400 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1401 2.47778695782 0 99.4290001 0.6609999000000016 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1402 2.50228643329 0 99.4385002 0.6514998000000048 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1403 2.50228643329 0 99.01299965 1.0155005250000002 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1404 1.97610564729 0 99.080002 0.9149970000000067 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1405 3.86059861244 0 99.59099995 0.4990000500000008 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1406 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1407 2.47124761202 0 99.321500075 0.5527498874999921 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1408 3.92040413524 0 99.55849965 0.5315003499999961 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1409 2.47778695782 0 99.33599825 0.5310026249999922 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1410 2.44096937877 0 99.022499775 1.0012503374999966 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1411 3.86059861244 0 99.6165006 0.4734994000000029 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1412 2.47778695782 0 99.3005006 0.5842490999999868 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1413 2.81322619695 0 99.5374991 0.5525008999999926 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1414 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1415 1.99590274244 0 99.580002 0.5099980000000045 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1416 5.02870270579 0 99.469500425 0.620499574999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1417 6.14799414721 0 99.461499925 0.6285000749999995 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1418 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1419 2.50228643329 0 99.413499975 0.6765000250000043 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1420 3.92040413524 0 99.541999425 0.5480005749999975 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1421 2.47778695782 0 99.026499675 0.9952504875000017 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1422 6.30106886729 0 99.4355002 0.6544997999999908 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1423 2.81322619695 0 99.46250045 0.6274995500000046 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1424 2.44096937877 0 99.44600045 0.6439995499999981 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1425 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1426 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1427 6.14799414721 0 99.55749925 0.5325007499999913 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1428 2.47124761202 0 99.362998825 0.49050176249998856 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1429 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1430 6.14799414721 0 99.418499925 0.6715000749999916 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1431 3.34244261096 0 99.6115011 0.4784988999999996 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1432 2.47778695782 0 99.425000225 0.6649997749999926 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1433 6.30106886729 0 99.464500175 0.625499825 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1434 6.14799414721 0 99.49900035 0.5909996499999949 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1435 2.50228643329 0 99.09000035 0.8999994750000013 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1436 2.47778695782 0 98.9905002 1.0492496999999972 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1437 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1438 6.30106886729 0 99.4660008 0.6239991999999944 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1439 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1440 6.10789096832 0 99.4094999 0.6805000999999976 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1441 3.92040413524 0 99.3284995 0.5422507499999867 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1442 6.30106886729 0 99.515500125 0.5744998749999951 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1443 2.78229733114 0 99.6075006 0.48249940000000324 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1444 2.47124761202 0 99.386499275 0.4552510874999882 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1445 4.93072604433 0 99.42349955 0.6665004499999952 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1446 3.38717868509 0 99.3259999 0.5460001499999976 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1447 3.38717868509 0 99.570499925 0.5195000750000048 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1448 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1449 6.10789096832 0 99.58400005 0.5059999499999975 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1450 3.80166404425 0 99.525999425 0.5640005750000029 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1451 3.7862916372 0 99.4165003 0.6734997000000021 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1452 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1453 6.30106886729 0 99.502499825 0.5875001749999967 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1454 2.47778695782 0 99.323499175 0.5497512375000042 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1455 2.47778695782 0 99.047999525 0.9630007125000049 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1456 6.30106886729 0 99.425499675 0.6645003250000002 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1457 2.50228643329 0 98.892999075 1.1955013874999878 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1458 2.44096937877 0 99.398499325 0.6915006749999947 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1459 2.81322619695 0 99.590000925 0.4999990749999995 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1460 3.33055390722 0 99.73649975 0.35350025000000473 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1461 2.47778695782 0 99.26500055 0.6374991750000021 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1462 6.14799414721 0 99.4905 0.5995000000000005 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1463 3.92040413524 0 99.54349955 0.5465004499999907 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1464 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1465 2.50228643329 0 99.377499075 0.46875138749999223 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1466 2.00016617632 0 99.0 1.0349999999999966 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1467 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1468 2.47124761202 0 99.267000075 0.6344998874999987 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1469 2.50228643329 0 99.283000625 0.6104990624999971 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1470 2.47778695782 0 99.420999375 0.6690006250000039 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1471 2.50228643329 0 99.3724994 0.4762509000000037 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1472 6.14799414721 0 99.413499925 0.6765000750000013 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1473 1.99590274244 0 98.139999 2.325001499999992 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1474 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1475 6.30106886729 0 99.445500575 0.6444994250000008 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1476 6.10789096832 0 99.399999175 0.6900008249999928 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1477 2.50228643329 0 98.23200065 2.1869990249999915 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1478 3.34244261096 0 99.574999975 0.5150000250000005 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1479 2.47124761202 0 99.022999675 1.0005004875000054 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1480 2.78229733114 0 99.5164984 0.5735015999999945 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1481 2.77405457184 0 99.577998975 0.5120010249999979 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1482 5.02870270579 0 99.273000325 0.6254995124999994 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1483 6.30106886729 0 99.44949995 0.6405000499999943 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1484 2.50228643329 0 98.908000625 1.172999062499997 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1485 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1486 2.81322619695 0 99.587999675 0.5020003249999917 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1487 2.00016617632 0 99.660004 0.42999599999999705 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1488 3.85964385182 0 99.2625002 0.6412496999999888 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1489 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1490 2.81322619695 0 99.331499075 0.537751387500002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1491 3.86059861244 0 99.5910002 0.49899980000000144 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1492 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1493 2.47778695782 0 98.999500075 1.0357498874999962 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1494 3.34244261096 0 99.616000475 0.47399952499999076 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1495 3.85964385182 0 99.484999975 0.6050000250000039 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1496 3.92040413524 0 99.5539999 0.536000100000004 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1497 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1498 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1499 2.47778695782 0 99.0249994 0.9975008999999986 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1500 4.90489779833 0 99.41399985 0.6760001500000016 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1501 2.78229733114 0 99.554999075 0.535000925 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1502 5.92620561097 0 99.5504994 0.539500599999991 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1503 1.99590274244 0 99.260002 0.6449969999999965 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1504 2.47124761202 0 99.310500825 0.5692487624999885 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1505 2.81322619695 0 99.538999575 0.5510004249999924 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1506 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1507 6.30106886729 0 99.467000075 0.6229999249999963 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1508 2.47778695782 0 99.39350025 0.6964997499999953 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1509 6.10789096832 0 99.579499625 0.5105003750000009 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1510 4.93072604433 0 99.5169999 0.5730000999999959 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1511 2.50228643329 0 99.3804993 0.46425105000000144 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1512 5.02870270579 0 99.475000625 0.6149993749999908 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1513 2.47778695782 0 98.93350015 1.134749774999996 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1514 6.14799414721 0 99.503000075 0.586999924999995 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1515 5.02870270579 0 99.458999975 0.6310000250000002 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1516 2.50228643329 0 99.0304991 0.9892513499999964 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1517 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1518 3.92040413524 0 99.554498975 0.5355010249999964 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1519 3.92040413524 0 99.566999875 0.5230001250000044 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1520 6.30106886729 0 99.474499975 0.6155000249999972 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1521 3.92040413524 0 99.3329993 0.5355010500000006 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1522 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1523 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1524 6.30106886729 0 99.502499275 0.5875007249999925 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1525 6.30106886729 0 99.4525002 0.6374997999999948 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1526 2.50228643329 0 98.3025 2.0812500000000043 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1527 6.30106886729 0 99.253999525 0.6540007125000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1528 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1529 2.50228643329 0 99.3794995 0.4657507500000051 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1530 2.81322619695 0 99.3324994 0.5362508999999918 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1531 6.30106886729 0 99.4319996 0.6580004000000003 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1532 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1533 2.50228643329 0 99.4100001 0.6799998999999929 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1534 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1535 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1536 5.02870270579 0 99.476500375 0.6134996249999972 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1537 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1538 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1539 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1540 2.81322619695 0 99.5905005 0.4994994999999932 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1541 2.50228643329 0 99.394999125 0.6950008749999995 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1542 2.50228643329 0 97.880498675 2.714251987499999 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1543 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1544 2.50228643329 0 99.3764997 0.47025045000000176 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1545 2.50228643329 0 99.371499825 0.4777502624999954 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1546 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1547 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1548 3.85964385182 0 99.26600075 0.6359988749999914 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1549 2.50228643329 0 97.659501225 3.045748162499997 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1550 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1551 2.50228643329 0 98.23250025 2.186249624999995 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1552 2.50228643329 0 99.00199985 1.0320002249999902 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1553 2.50228643329 0 99.37399865 0.4740020249999901 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1554 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1555 2.50228643329 0 99.102000425 0.8819993624999967 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1556 3.85964385182 0 99.4645003 0.6254997000000003 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1557 2.50228643329 0 99.281000925 0.6134986124999955 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1558 6.30106886729 0 99.4530003 0.6369996999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1559 2.50228643329 0 99.187000725 0.7544989124999901 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1560 2.50228643329 0 98.898499075 1.187251387499991 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1561 2.50228643329 0 98.736500275 1.4302495875000005 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1562 3.38717868509 0 99.573999575 0.5160004249999958 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1563 5.02870270579 0 99.513499725 0.5765002749999951 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1564 2.47778695782 0 98.9680005 1.0829992499999932 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1565 3.33055390722 0 99.737999875 0.3520001249999979 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1566 2.47778695782 0 99.034999825 0.9825002624999968 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1567 2.50228643329 0 99.42649985 0.6635001499999987 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1568 2.78229733114 0 99.512998925 0.5770010749999926 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1569 2.78229733114 0 99.57799995 0.5120000499999918 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1570 4.78704248134 0 99.5660001 0.5239999000000012 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1571 3.85964385182 0 99.475500875 0.6144991250000033 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1572 2.50228643329 0 99.3974997 0.6925003000000004 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1573 6.10789096832 0 99.57150015 0.5184998499999921 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1574 2.81322619695 0 99.542499525 0.5475004750000011 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1575 2.50228643329 0 99.44250095 0.6474990500000019 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1576 6.14799414721 0 99.460000325 0.629999675000002 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1577 2.50228643329 0 99.00200005 1.0319999249999867 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1578 2.47778695782 0 99.050499675 0.9592504875000003 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1579 3.92040413524 0 99.55249935 0.5375006499999927 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1580 6.30106886729 0 99.454000825 0.6359991750000035 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1581 3.92040413524 0 99.5760004 0.5139995999999997 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1582 2.00016617632 0 99.199997 0.7350045000000023 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1583 6.14799414721 0 99.54849945 0.5415005500000035 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1584 2.47124761202 0 99.2154998 0.7117502999999914 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1585 2.81322619695 0 99.57649945 0.5135005499999977 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1586 2.73595882486 0 99.639001225 0.4509987749999965 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1587 2.47778695782 0 99.4405004 0.649499599999993 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1588 2.47124761202 0 99.503500175 0.5864998249999985 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1589 3.80166404425 0 99.47900055 0.6109994500000028 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1590 2.50228643329 0 99.404499675 0.685500325000001 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1591 2.47124761202 0 99.3180001 0.5579998499999874 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1592 3.71567552873 0 99.5499996 0.540000399999991 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1593 2.47124761202 0 99.273500675 0.6247489875000056 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1594 2.50228643329 0 99.026499175 0.9952512374999998 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1595 3.85964385182 0 99.467500875 0.6224991249999988 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1596 3.38717868509 0 99.567999575 0.522000424999996 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1597 3.34244261096 0 99.6130008 0.47699920000000307 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1598 3.80166404425 0 99.52149975 0.5685002499999939 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1599 6.10789096832 0 99.331499675 0.5377504874999914 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1600 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1601 5.02870270579 0 99.5239992 0.566000799999992 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1602 3.77195447337 0 99.628501325 0.46149867499999575 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1603 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1604 3.80166404425 0 99.5234991 0.5665009000000026 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1605 6.14799414721 0 99.500499575 0.5895004249999914 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1606 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1607 2.47124761202 0 99.386499275 0.4552510874999882 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1608 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1609 1.99590274244 0 99.599998 0.4900019999999984 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1610 2.50228643329 0 99.00950015 1.0207497750000059 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1611 2.47778695782 0 99.40549985 0.6845001499999995 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1612 3.85964385182 0 99.473500825 0.6164991749999956 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1613 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1614 3.86059861244 0 99.506499875 0.5835001249999948 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1615 2.47778695782 0 99.397499725 0.6925002749999948 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1616 5.92620561097 0 99.544999775 0.545000225000004 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1617 6.30106886729 0 99.473999525 0.6160004750000013 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1618 3.86059861244 0 99.5965 0.4934999999999917 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1619 6.14799414721 0 99.5010002 0.5889998000000048 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1620 5.02870270579 0 99.4765004 0.6134995999999916 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1621 3.38717868509 0 99.541999425 0.5480005749999975 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1622 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1623 2.78229733114 0 99.615500325 0.47449967499999846 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1624 6.30106886729 0 99.502500325 0.587499674999998 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1625 6.30106886729 0 99.43000005 0.6599999499999939 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1626 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1627 6.30106886729 0 99.26200065 0.6419990249999898 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1628 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1629 4.90489779833 0 99.586500025 0.5034999749999912 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1630 4.93072604433 0 99.510999575 0.5790004249999982 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1631 2.78229733114 0 99.614000375 0.4759996249999944 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1632 3.84474688915 0 99.43100005 0.6589999500000033 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1633 2.47778695782 0 99.2955008 0.5917487999999977 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1634 2.50228643329 0 99.184499825 0.7582502624999918 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1635 2.47778695782 0 99.329499525 0.5407507124999924 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1636 4.93072604433 0 99.52249995 0.567500050000001 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1637 3.33055390722 0 99.576000025 0.5139999749999987 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1638 3.38717868509 0 99.583000075 0.5069999249999967 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1639 3.34244261096 0 99.50749975 0.5825002500000039 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1640 3.92040413524 0 99.558499675 0.5315003250000047 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1641 3.86059861244 0 99.595000075 0.49499992499999623 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1642 2.47778695782 0 99.437500275 0.6524997249999928 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1643 2.50228643329 0 99.373499575 0.47475063750000146 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1644 1.99590274244 0 98.599998 1.6350029999999975 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1645 6.14799414721 0 99.50299975 0.587000249999997 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1646 2.47124761202 0 99.1220011 0.8519983499999881 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1647 2.81322619695 0 99.470500925 0.6194990750000017 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1648 2.47124761202 0 98.3239998 2.049000300000003 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1649 2.00016617632 0 98.980003 1.064995500000002 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1650 2.47778695782 0 99.41799995 0.6720000500000026 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1651 3.71567552873 0 99.553999325 0.5360006749999912 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1652 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1653 2.77405457184 0 99.738499275 0.3515007250000025 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1654 5.02870270579 0 99.255999975 0.6510000375000047 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1655 2.50228643329 0 98.902999675 1.180500487499991 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1656 6.30106886729 0 99.258499925 0.6472501125000036 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1657 3.27579123647 0 99.64500205 0.44499794999999553 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1658 2.47778695782 0 98.93350115 1.1347482749999998 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1659 2.81322619695 0 99.333498825 0.5347517624999867 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1660 6.30106886729 0 99.460500425 0.6294995749999913 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1661 2.50228643329 0 99.365498975 0.4867515375000053 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1662 5.02870270579 0 99.467001175 0.6229988250000048 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1663 3.85964385182 0 99.47449985 0.6155001499999969 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1664 3.92040413524 0 99.479499575 0.6105004249999922 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1665 4.78704248134 0 99.5624994 0.5275006000000048 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1666 3.85964385182 0 99.468500375 0.6214996249999928 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1667 6.30106886729 0 99.4335002 0.6564998000000003 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1668 3.80166404425 0 99.41949975 0.6705002499999978 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1669 2.81322619695 0 99.573000475 0.5169995249999971 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1670 2.47778695782 0 99.38749925 0.4537511249999895 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1671 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1672 3.85964385182 0 99.27349995 0.6247500749999944 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1673 2.50228643329 0 99.28100055 0.6134991749999941 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1674 6.14799414721 0 99.507500275 0.5824997249999996 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1675 6.10789096832 0 99.420499775 0.6695002250000016 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1676 4.90489779833 0 99.34499945 0.517500824999992 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1677 6.10789096832 0 99.3319992 0.5370011999999988 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1678 3.38717868509 0 99.46999995 0.6200000499999959 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1679 2.47124761202 0 98.53199905 1.7370014250000025 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1680 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1681 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1682 2.78229733114 0 99.615001075 0.47499892500000274 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1683 2.47778695782 0 99.47850095 0.6114990500000005 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1684 6.30106886729 0 99.50049995 0.5895000499999924 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1685 2.50228643329 0 99.469001675 0.6209983249999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1686 2.00016617632 0 99.620003 0.4699970000000008 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1687 1.99590274244 0 98.440002 1.8749969999999863 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1688 6.14799414721 0 99.46750035 0.6224996500000032 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1689 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1690 4.93072604433 0 99.4275003 0.6624996999999923 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1691 6.14799414721 0 99.422500325 0.6674996749999963 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1692 3.80166404425 0 99.48150015 0.6084998499999955 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1693 6.30106886729 0 99.465000225 0.6249997750000006 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1694 2.78229733114 0 99.554499175 0.5355008249999941 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1695 2.81322619695 0 99.594500125 0.4954998750000016 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1696 2.50228643329 0 99.28250105 0.6112484250000065 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1697 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1698 3.38717868509 0 99.541999225 0.5480007749999999 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1699 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1700 6.30106886729 0 99.451500025 0.6384999749999963 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1701 3.7862916372 0 99.32999935 0.5400009750000052 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1702 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1703 2.47124761202 0 99.1944995 0.7432507499999872 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1704 2.00016617632 0 98.580002 1.6649970000000067 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1705 2.47124761202 0 98.273000575 2.1254991375000003 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1706 3.80166404425 0 99.573999925 0.5160000750000023 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1707 2.47124761202 0 98.8214997 1.3027504499999907 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1708 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1709 3.38717868509 0 99.33449965 0.5332505249999997 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1710 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1711 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1712 2.50228643329 0 98.914500175 1.1632497374999957 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1713 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1714 6.30106886729 0 99.4310002 0.658999799999998 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1715 2.01610051566 0 98.459999 1.8450015000000022 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1716 2.50228643329 0 99.474000575 0.6159994249999926 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1717 3.92040413524 0 99.478000175 0.6119998249999924 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1718 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1719 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1720 5.02870270579 0 99.51599985 0.5740001499999977 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1721 6.30106886729 0 99.4455001 0.6444998999999939 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1722 2.50228643329 0 99.367498375 0.4837524375000015 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1723 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1724 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1725 2.50228643329 0 99.029999175 0.990001237499996 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1726 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1727 2.81322619695 0 99.4665007 0.6234993000000003 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1728 5.02870270579 0 99.4305001 0.6594998999999945 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1729 2.50228643329 0 99.09350035 0.8947494749999976 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1730 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1731 6.30106886729 0 99.2665001 0.6352498499999939 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1732 2.50228643329 0 98.74949935 1.4107509750000062 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1733 6.30106886729 0 99.467000275 0.622999724999994 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1734 3.38717868509 0 99.47200055 0.6179994499999936 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1735 2.50228643329 0 98.994999875 1.0425001874999893 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1736 2.50228643329 0 98.335499625 2.031750562500001 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1737 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1738 2.50228643329 0 99.402999975 0.6870000249999976 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1739 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1740 3.85964385182 0 99.43049945 0.6595005499999985 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1741 6.30106886729 0 99.4500003 0.6399996999999985 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1742 2.01610051566 0 99.040001 0.974998499999991 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1743 3.92040413524 0 99.53550005 0.5544999500000017 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1744 6.30106886729 0 99.503999575 0.5860004250000032 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf1745 3.38717868509 0 99.5845001 0.5054998999999981 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt deleted file mode 100644 index 822d30a5319da50d56e3411929f35186d3fc2de9..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/autotuner_data/tuner_promise_confs_batch220_single.txt +++ /dev/null @@ -1,5888 +0,0 @@ -+++++ -conf1 1 0 99.69 0 -1 gpu conv fp32 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 1 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp32 1 add fp32 1 tanh fp32 1 -4 gpu mul fp32 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf1 2.47778695782 0 99.4405011 0.6494988999999919 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf2 6.7963162944 0 99.247499625 0.6637505624999918 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf3 3.77195447337 0 99.475500875 0.6144991250000033 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf4 3.71656038268 0 99.55999965 0.5300003500000031 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf5 4.4071692756 0 99.52149975 0.5685002499999939 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf6 6.14799414721 0 99.5005001 0.5894999000000013 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf7 2.57685599488 0 99.380999175 0.4635012374999974 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf8 3.13161472572 0 99.57700015 0.5129998499999943 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf9 2.00016617632 0 99.68 0.4099999999999909 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf10 2.57685599488 0 99.097999925 0.8880001125000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf11 3.38717868509 0 99.550999225 0.5390007749999995 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf12 3.84474688915 0 99.72650005 0.36349994999999924 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf13 4.48527898013 0 99.467000825 0.6229991749999982 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf14 3.70186719231 0 99.72250035 0.3674996499999935 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf15 2.47778695782 0 98.99100065 1.048499024999991 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf16 4.62093815126 0 99.3364993 0.5302510499999968 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf17 3.09333654389 0 99.6080005 0.4819994999999949 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf18 2.55088214386 0 99.00499975 1.0275003750000025 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf19 3.97649090032 0 99.429000025 0.6609999750000043 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf20 5.92620561097 0 99.556000075 0.5339999249999977 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf21 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf22 5.22888975029 0 99.508999675 0.5810003249999994 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf23 3.13161472572 0 99.4715006 0.6184993999999989 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf24 5.98028404553 0 99.533499775 0.5565002250000021 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf25 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf26 3.84474688915 0 99.566499875 0.5235001249999925 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf27 6.7963162944 0 99.44900035 0.640999649999992 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf28 3.09333654389 0 99.5589995 0.531000499999999 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf29 3.77195447337 0 99.5525002 0.5374998000000005 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf30 5.33920664205 0 99.434000525 0.6559994749999959 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf31 6.61857279171 0 99.493500625 0.5964993750000019 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf32 2.55088214386 0 99.48100075 0.6089992499999909 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf33 6.14799414721 0 99.46299985 0.627000149999995 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf34 2.47778695782 0 99.255999775 0.6510003374999869 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf35 2.55088214386 0 99.405999975 0.6840000249999975 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf36 3.38717868509 0 99.47850045 0.6114995499999992 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf37 4.93072604433 0 99.52249925 0.5675007500000021 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf38 4.48527898013 0 99.4800001 0.6099998999999997 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf39 2.57685599488 0 99.172500625 0.7762490624999998 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf40 4.73066277039 0 99.453000075 0.6369999249999921 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf41 2.55088214386 0 99.25799985 0.6480002249999899 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf42 6.10789096832 0 99.576499825 0.5135001749999987 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf43 3.85964385182 0 99.4670006 0.622999399999992 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf44 2.44096937877 0 99.3809992 0.46350119999998896 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf45 2.57685599488 0 99.419500025 0.6704999749999928 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf46 1.99590274244 0 99.459999 0.6300010000000015 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf47 2.47778695782 0 99.38649995 0.45525007499999504 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf48 2.5439518228 0 99.1235002 0.8497497000000038 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf49 3.71656038268 0 99.572999425 0.5170005749999916 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf50 2.47778695782 0 99.4265002 0.6634997999999911 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf51 4.38652335485 0 99.335499625 0.531750562500001 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf52 2.47778695782 0 99.419999975 0.6700000250000017 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf53 2.50228643329 0 99.034499525 0.983250712499995 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf54 3.97649090032 0 99.570999775 0.5190002249999935 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf55 5.33920664205 0 99.457999875 0.632000124999999 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf56 3.95967525105 0 99.33649965 0.5302505250000067 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf57 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf58 2.47778695782 0 99.3020012 0.5819981999999868 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf59 5.92620561097 0 99.4655009 0.6244991000000027 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf60 4.64385542353 0 99.468000075 0.6219999249999916 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf61 2.57685599488 0 99.355498525 0.5017522124999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf62 4.73066277039 0 99.4225003 0.6674997000000019 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf63 1.99590274244 0 99.639999 0.45000099999999466 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf64 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf65 3.38717868509 0 99.333499175 0.5347512374999965 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf66 5.92620561097 0 99.476000125 0.613999874999999 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf67 2.81322619695 0 99.580499825 0.5095001749999938 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf68 2.78229733114 0 99.52199845 0.5680015499999996 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf69 3.34244261096 0 99.6135014 0.47649859999999367 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf70 3.97649090032 0 99.51250005 0.5774999499999979 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf71 6.61857279171 0 99.5040001 0.5859998999999988 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf72 6.61857279171 0 99.420500525 0.6694994750000035 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf73 6.61857279171 0 99.504999975 0.5850000249999937 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf74 3.63433700317 0 99.652501475 0.43749852500000375 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf75 6.10789096832 0 99.3409999 0.5235001499999967 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf76 4.4071692756 0 99.478000575 0.6119994250000019 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf77 2.50228643329 0 99.40099985 0.6890001499999926 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf78 3.77195447337 0 99.3255004 0.546749400000003 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf79 3.80166404425 0 99.565500025 0.524499974999992 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf80 6.57211871555 0 99.333000125 0.5354998124999995 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf81 2.50228643329 0 99.47350135 0.6164986499999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf82 5.79060658268 0 99.519000125 0.5709998749999926 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf83 2.50228643329 0 99.381999275 0.46200108749999913 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf84 2.47778695782 0 99.1835005 0.7597492500000058 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf85 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf86 2.55088214386 0 99.39199985 0.6980001499999929 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf87 3.85964385182 0 99.4795001 0.6104999000000021 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf88 4.38652335485 0 99.41799955 0.6720004499999931 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf89 4.93072604433 0 99.51499965 0.5750003500000048 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf90 3.13161472572 0 99.574500425 0.5154995750000012 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf91 3.09333654389 0 99.6145003 0.47549969999999464 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf92 3.92040413524 0 99.578000175 0.5119998249999981 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf93 5.33920664205 0 99.44499995 0.6450000500000016 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf94 2.47778695782 0 99.45300095 0.6369990499999943 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf95 2.50228643329 0 99.36299905 0.49050142499999794 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf96 5.02870270579 0 99.458500175 0.6314998250000002 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf97 5.75501684906 0 99.346999025 0.5145014624999931 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf98 2.57685599488 0 99.436500375 0.6534996250000035 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf99 3.33055390722 0 99.4209998 0.6690001999999936 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf100 5.59344058403 0 99.55649965 0.5335003499999914 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf101 5.79060658268 0 99.556999575 0.5330004249999917 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf102 3.7862916372 0 99.3334994 0.5347509000000059 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf103 6.30106886729 0 99.5175 0.5724999999999995 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf104 5.06758777035 0 99.552499325 0.5375006749999983 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf105 6.14799414721 0 99.499999525 0.5900004749999909 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf106 2.51187737029 0 99.4014999 0.6885000999999932 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf107 2.55088214386 0 99.39649945 0.6935005500000045 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf108 2.47124761202 0 99.1929994 0.745500899999989 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf109 4.03997047176 0 99.52149955 0.5685004499999963 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf110 3.70186719231 0 99.42099935 0.6690006499999953 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf111 2.47778695782 0 99.404999575 0.6850004249999927 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf112 3.97649090032 0 99.4600001 0.6299998999999957 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf113 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf114 3.86059861244 0 99.50650005 0.5834999499999981 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf115 2.57685599488 0 99.460000575 0.6299994250000026 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf116 5.33920664205 0 99.2685007 0.6322489499999904 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf117 5.02870270579 0 99.468000425 0.6219995749999981 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf118 4.4071692756 0 99.56599955 0.524000449999997 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf119 5.02870270579 0 99.4325 0.6574999999999932 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf120 6.61857279171 0 99.457500525 0.6324994749999974 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf121 2.00016617632 0 99.080002 0.9149970000000067 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf122 2.50228643329 0 99.4199998 0.6700001999999984 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf123 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf124 6.16535217595 0 99.58050025 0.5094997499999977 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf125 6.30106886729 0 99.45700045 0.6329995500000024 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf126 2.57685599488 0 99.407999525 0.6820004750000038 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf127 3.77195447337 0 99.6320011 0.4579989000000012 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf128 6.57211871555 0 99.3999996 0.6900003999999967 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf129 6.36224047437 0 99.264500075 0.6382498874999953 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf130 4.48527898013 0 99.52299915 0.5670008499999938 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf131 5.79060658268 0 99.416500475 0.6734995249999912 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf132 2.5439518228 0 99.32149995 0.5527500749999916 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf133 2.55088214386 0 99.4905002 0.5994997999999981 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf134 3.95967525105 0 99.4149999 0.6750000999999998 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf135 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf136 3.80166404425 0 99.421499725 0.6685002749999939 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf137 3.7862916372 0 99.419999575 0.6700004249999921 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf138 3.77195447337 0 99.570499675 0.5195003250000042 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf139 3.86059861244 0 99.6145001 0.475499899999997 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf140 2.78229733114 0 99.609500425 0.4804995750000046 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf141 5.02870270579 0 99.446999675 0.643000324999997 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf142 3.77195447337 0 99.540499625 0.5495003750000024 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf143 4.90489779833 0 99.588500425 0.5014995749999912 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf144 5.75501684906 0 99.570999975 0.5190000249999912 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf145 2.5439518228 0 99.37649915 0.4702512749999954 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf146 6.16535217595 0 99.336499425 0.5302508624999973 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf147 5.33920664205 0 99.46600045 0.6239995500000021 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf148 2.47124761202 0 99.125500425 0.8467493624999989 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf149 4.03997047176 0 99.44900065 0.6409993499999956 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf150 4.48527898013 0 99.268500375 0.6322494374999934 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf151 3.08315119118 0 99.7369988 0.3530012000000028 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf152 2.55088214386 0 99.406000225 0.6839997749999981 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf153 6.36224047437 0 99.46550055 0.6244994499999962 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf154 4.73066277039 0 99.46849975 0.6215002499999912 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf155 3.38717868509 0 99.5730001 0.5169998999999962 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf156 6.10789096832 0 99.4119995 0.6780005000000046 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf157 3.08315119118 0 99.426000325 0.6639996749999938 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf158 4.93072604433 0 99.431000325 0.6589996749999983 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf159 5.79060658268 0 99.47600065 0.6139993499999946 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf160 6.36224047437 0 99.54099985 0.549000149999992 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf161 2.50228643329 0 99.3779992 0.4680011999999891 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf162 6.7963162944 0 99.4620004 0.627999600000004 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf163 2.57685599488 0 99.37699935 0.46950097499998833 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf164 6.20621598565 0 99.471000525 0.618999475000004 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf165 6.30106886729 0 99.439 0.6510000000000048 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf166 6.36224047437 0 99.440499925 0.6495000750000003 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf167 4.29202279061 0 99.5565002 0.5334997999999956 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf168 2.57685599488 0 98.9975002 1.0387496999999897 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf169 3.85964385182 0 99.483499875 0.606500124999991 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf170 2.50228643329 0 99.2815005 0.612749249999986 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf171 6.36224047437 0 99.457000375 0.6329996249999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf172 6.20621598565 0 99.55249975 0.5375002500000022 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf173 3.33055390722 0 99.583999875 0.5060001249999942 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf174 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf175 5.75501684906 0 99.41749995 0.6725000499999908 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf176 6.20621598565 0 99.503500175 0.5864998249999985 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf177 3.85964385182 0 99.4360005 0.653999499999992 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf178 2.47778695782 0 99.3329996 0.535500600000006 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf179 2.55088214386 0 99.409999625 0.6800003750000002 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf180 2.57685599488 0 99.010999225 1.0185011624999873 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf181 4.64385542353 0 99.567499925 0.5225000749999907 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf182 2.47778695782 0 99.4029989 0.6870010999999977 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf183 3.38717868509 0 99.57399985 0.5160001499999908 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf184 2.77405457184 0 99.4234996 0.6665003999999982 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf185 2.50228643329 0 99.379499125 0.4657513125000037 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf186 6.14799414721 0 99.4990005 0.5909995000000038 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf187 6.7963162944 0 99.43399975 0.6560002499999996 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf188 5.33920664205 0 99.51449955 0.5755004500000013 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf189 6.30106886729 0 99.266500125 0.6352498125000068 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf190 4.93072604433 0 99.56199985 0.5280001499999912 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf191 4.73066277039 0 99.51799975 0.5720002499999964 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf192 2.57685599488 0 99.285000425 0.6074993624999863 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf193 2.5439518228 0 99.1979999 0.7380001499999977 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf194 5.92620561097 0 99.44250075 0.6474992500000042 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf195 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf196 2.47778695782 0 99.0264997 0.9952504499999932 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf197 4.48527898013 0 99.449500675 0.6404993250000018 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf198 4.64385542353 0 99.5164996 0.5735003999999947 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf199 5.19985255986 0 99.3364995 0.5302507499999933 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf200 3.34244261096 0 99.608000925 0.48199907499999883 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf201 4.73066277039 0 99.265000525 0.6374992124999892 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf202 3.97649090032 0 99.51849965 0.5715003500000023 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf203 3.97649090032 0 99.52049955 0.569500450000001 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf204 5.22888975029 0 99.508000225 0.5819997749999942 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf205 4.90489779833 0 99.42149985 0.6685001499999942 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf206 2.5439518228 0 99.492999825 0.5970001749999995 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf207 4.03997047176 0 99.46600075 0.6239992499999915 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf208 2.81322619695 0 99.589000375 0.5009996250000001 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf209 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf210 5.22888975029 0 99.5105 0.5795000000000045 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf211 2.55088214386 0 99.42599955 0.6640004499999975 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf212 2.81322619695 0 99.542999675 0.5470003249999934 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf213 2.47778695782 0 99.05800035 0.947999474999996 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf214 2.55088214386 0 99.39599955 0.6940004499999987 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf215 4.93072604433 0 99.472500675 0.6174993249999915 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf216 5.02870270579 0 99.270500225 0.629249662499987 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf217 3.86059861244 0 99.590000025 0.4999999750000029 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf218 3.88250959671 0 99.55349975 0.5365002499999975 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf219 2.50228643329 0 99.365499425 0.48675086250000277 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf220 2.55088214386 0 98.99799995 1.0380000750000065 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf221 3.80166404425 0 99.53449915 0.5555008499999957 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf222 2.00016617632 0 99.239998 0.6750029999999967 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf223 2.00016617632 0 99.519997 0.5700029999999942 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf224 2.51187737029 0 99.024499825 0.9982502624999867 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf225 3.86059861244 0 99.57049935 0.519500649999992 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf226 2.47778695782 0 99.4985004 0.5914996000000002 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf227 3.71656038268 0 99.508999525 0.5810004750000047 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf228 3.71656038268 0 99.607000725 0.48299927499999173 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf229 2.47778695782 0 99.324499325 0.5482510124999891 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf230 6.14799414721 0 99.5609996 0.5290003999999954 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf231 3.77195447337 0 99.5870001 0.5029999000000004 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf232 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf233 3.92040413524 0 99.545499775 0.5445002250000016 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf234 3.92040413524 0 99.57699935 0.5130006500000036 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf235 2.55088214386 0 98.987999875 1.0530001874999968 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf236 2.57685599488 0 99.363999075 0.48900138750000366 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf237 4.48527898013 0 99.4369996 0.6530004000000048 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf238 5.79060658268 0 99.5149995 0.5750004999999959 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf239 2.81322619695 0 99.463000675 0.6269993249999942 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf240 4.64385542353 0 99.523000025 0.566999974999996 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf241 2.81322619695 0 99.33849865 0.5272520249999886 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf242 3.95967525105 0 99.6014996 0.48850040000000094 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf243 6.36224047437 0 99.4400002 0.6499997999999977 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf244 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf245 2.78229733114 0 99.57849985 0.5115001499999977 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf246 3.92040413524 0 99.554999725 0.5350002749999959 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf247 2.50228643329 0 98.990500375 1.049249437500002 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf248 2.57685599488 0 99.272500225 0.626249662499994 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf249 2.00016617632 0 99.339996 0.5250059999999976 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf250 2.57685599488 0 99.4725001 0.6174998999999929 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf251 2.55088214386 0 99.3259999 0.5460001499999976 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf252 2.73595882486 0 99.64500115 0.44499884999999895 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf253 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf254 6.14799414721 0 99.418500325 0.6714996750000012 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf255 3.92040413524 0 99.3274997 0.5437504499999903 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf256 5.92620561097 0 99.256500075 0.6502498874999887 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf257 2.47778695782 0 99.4835009 0.6064991000000021 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf258 2.57685599488 0 99.441500975 0.648499025000001 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf259 3.86059861244 0 99.615001125 0.4749988749999915 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf260 2.57685599488 0 99.02799945 0.9930008250000029 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf261 3.85964385182 0 99.522499925 0.5675000749999924 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf262 6.61857279171 0 99.545499925 0.5445000749999963 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf263 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf264 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf265 4.90489779833 0 99.343498975 0.5197515374999924 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf266 2.55088214386 0 99.449500125 0.6404998749999976 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf267 3.38717868509 0 99.56699955 0.5230004499999922 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf268 6.16535217595 0 99.40699995 0.6830000499999983 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf269 3.13161472572 0 99.577999975 0.5120000250000004 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf270 5.02870270579 0 99.5194997 0.5705003000000005 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf271 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf272 4.03997047176 0 99.270000125 0.629999812500003 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf273 2.50228643329 0 99.436000225 0.653999774999997 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf274 6.30106886729 0 99.4565001 0.6334998999999982 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf275 4.64385542353 0 99.520499675 0.5695003250000014 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf276 6.20621598565 0 99.51049995 0.5795000500000015 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf277 5.22888975029 0 99.561499175 0.5285008250000033 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf278 2.50228643329 0 99.100999975 0.8835000375000064 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf279 6.20621598565 0 99.50749955 0.5825004499999921 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf280 2.50228643329 0 99.408499875 0.6815001249999938 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf281 2.44096937877 0 99.442000325 0.6479996750000027 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf282 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf283 4.78704248134 0 99.559999825 0.5300001749999922 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf284 6.7963162944 0 99.50250015 0.5874998499999947 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf285 4.93072604433 0 99.514499925 0.5755000750000022 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf286 2.50228643329 0 99.4350005 0.6549994999999967 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf287 4.51618813067 0 99.55749945 0.5325005500000032 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf288 5.22888975029 0 99.41699995 0.6730000499999932 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf289 4.4071692756 0 99.51849985 0.57150015 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf290 2.5439518228 0 99.19599945 0.7410008249999933 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf291 3.85964385182 0 99.27150005 0.6277499249999963 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf292 3.86059861244 0 99.5890004 0.5009995999999944 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf293 2.57685599488 0 99.377499425 0.4687508625000021 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf294 3.71656038268 0 99.59550035 0.49449965000000307 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf295 2.57685599488 0 99.360999575 0.4935006375000057 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf296 5.19985255986 0 99.40499985 0.6850001500000019 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf297 4.03997047176 0 99.44049995 0.6495000499999947 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf298 2.5439518228 0 99.282001325 0.6119980125000026 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf299 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf300 2.47778695782 0 98.968000625 1.0829990624999937 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf301 3.71656038268 0 99.624000875 0.46599912500000473 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf302 3.70186719231 0 99.567000175 0.5229998249999938 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf303 5.79060658268 0 99.507000675 0.5829993249999973 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf304 4.03997047176 0 99.4725003 0.6174997000000048 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf305 6.20621598565 0 99.42449955 0.6655004500000047 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf306 2.55088214386 0 99.171000825 0.778498762500007 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf307 3.71567552873 0 99.566999925 0.5230000749999931 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf308 2.00016617632 0 99.599998 0.4900019999999984 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf309 2.51187737029 0 99.449501 0.6404989999999998 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf310 2.47778695782 0 99.409999625 0.6800003750000002 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf311 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf312 3.92040413524 0 99.4805003 0.609499699999995 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf313 3.7862916372 0 99.593500375 0.4964996249999928 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf314 2.5439518228 0 99.2214994 0.7027508999999981 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf315 2.47778695782 0 98.999999825 1.0350002624999917 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf316 2.00016617632 0 99.360001 0.49499850000000123 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf317 3.13161472572 0 99.53799935 0.5520006499999909 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf318 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf319 2.5439518228 0 99.353998375 0.5040024374999916 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf320 2.00016617632 0 99.019997 1.0050044999999912 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf321 5.92620561097 0 99.510499825 0.5795001750000012 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf322 6.7963162944 0 99.430000375 0.6599996249999919 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf323 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf324 2.50228643329 0 99.288500125 0.6022498124999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf325 3.13161472572 0 99.335998525 0.531002212500006 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf326 6.36224047437 0 99.513500075 0.5764999250000017 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf327 2.55088214386 0 99.386498775 0.4552518374999863 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf328 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf329 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf330 6.57211871555 0 99.56999995 0.5200000500000016 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf331 5.92620561097 0 99.4315001 0.6584999000000039 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf332 6.30106886729 0 99.432499925 0.6575000749999959 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf333 2.55088214386 0 99.500499575 0.5895004249999914 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf334 6.57211871555 0 99.3380004 0.5279993999999988 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf335 6.30106886729 0 99.4234996 0.6665003999999982 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf336 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf337 2.47778695782 0 99.0089999 1.0215001499999872 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf338 4.73066277039 0 99.511499475 0.578500525000004 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf339 5.92620561097 0 99.42850015 0.6614998499999928 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf340 6.10789096832 0 99.337999325 0.528001012499999 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf341 2.00016617632 0 99.639999 0.45000099999999466 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf342 3.77195447337 0 99.642001075 0.4479989250000017 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf343 5.98028404553 0 99.551499675 0.5385003249999954 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf344 2.47778695782 0 99.443499975 0.6465000250000031 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf345 4.62093815126 0 99.58099945 0.5090005500000047 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf346 3.80166404425 0 99.565000275 0.524999724999995 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf347 2.50228643329 0 99.4395002 0.6504998000000001 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf348 2.00016617632 0 98.419998 1.9050029999999865 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf349 2.5439518228 0 99.21349975 0.7147503750000013 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf350 6.57211871555 0 99.5695004 0.5204996000000023 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf351 2.47778695782 0 99.02399955 0.9990006749999978 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf352 2.81322619695 0 99.337498225 0.5287526624999899 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf353 4.48527898013 0 99.4734996 0.6165004000000011 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf354 2.78229733114 0 99.551499625 0.5385003749999925 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf355 4.93072604433 0 99.473499625 0.6165003749999954 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf356 3.84474688915 0 99.427999575 0.6620004249999966 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf357 3.95967525105 0 99.333499625 0.534750562499994 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf358 3.80166404425 0 99.4210003 0.6689996999999949 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf359 6.30106886729 0 99.4224998 0.6675002000000007 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf360 3.7862916372 0 99.329999175 0.5400012375000003 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf361 2.81322619695 0 99.46900045 0.620999550000002 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf362 4.38652335485 0 99.335999375 0.5310009374999964 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf363 2.55088214386 0 99.264999825 0.6375002624999908 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf364 6.61857279171 0 99.55200015 0.53799985 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf365 5.79060658268 0 99.471500275 0.6184997250000009 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf366 2.5439518228 0 99.48699985 0.6030001499999941 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf367 3.77195447337 0 99.55600025 0.533999750000001 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf368 6.36224047437 0 99.253000075 0.6554998874999924 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf369 5.02870270579 0 99.4630002 0.6269998000000016 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf370 6.7963162944 0 99.447499725 0.6425002749999976 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf371 6.10789096832 0 99.574500175 0.5154998250000006 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf372 2.50228643329 0 99.382000175 0.461999737499994 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf373 3.34244261096 0 99.613501175 0.4764988250000016 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf374 6.14799414721 0 99.4234998 0.6665001999999959 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf375 2.5439518228 0 98.316499775 2.060250337500001 -1 gpu conv samp 35 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf376 3.92040413524 0 99.54199925 0.5480007499999943 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf377 5.22888975029 0 99.508000375 0.5819996250000031 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf378 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf379 4.03997047176 0 99.4290005 0.660999499999997 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf380 3.77195447337 0 99.579999825 0.5100001749999962 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf381 3.85964385182 0 99.46100045 0.6289995499999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf382 4.4071692756 0 99.519999425 0.5700005750000031 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf383 3.63433700317 0 99.644001375 0.44599862499999576 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf384 6.61857279171 0 99.49999985 0.590000150000003 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf385 2.5439518228 0 99.35599865 0.5010020249999911 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf386 6.61857279171 0 99.503999925 0.5860000749999955 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf387 6.16535217595 0 99.412499275 0.6775007249999959 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf388 3.97649090032 0 99.555499475 0.5345005249999929 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf389 5.33920664205 0 99.46450045 0.625499549999995 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf390 2.47124761202 0 99.383499 0.45975149999999587 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf391 3.77195447337 0 99.335499475 0.5317507874999876 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf392 6.61857279171 0 99.460500025 0.629499974999996 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf393 6.36224047437 0 99.441499775 0.6485002250000008 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf394 5.59344058403 0 99.55299965 0.5370003499999939 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf395 6.36224047437 0 99.5040001 0.5859998999999988 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf396 3.08315119118 0 99.56949995 0.520500050000004 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf397 4.93072604433 0 99.556999875 0.5330001249999953 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf398 2.50228643329 0 99.284500025 0.6082499624999969 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf399 2.47124761202 0 99.31950005 0.5557499249999935 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf400 6.10789096832 0 99.412999875 0.6770001250000007 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf401 2.5439518228 0 99.284500375 0.6082494375000067 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf402 2.55088214386 0 98.27499985 2.122500224999996 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf403 3.34244261096 0 99.51049965 0.5795003499999979 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf404 5.22888975029 0 99.512500025 0.5774999750000035 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf405 5.92620561097 0 99.272500625 0.626249062499987 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf406 2.55088214386 0 99.416999825 0.6730001749999929 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf407 2.50228643329 0 98.888500375 1.2022494374999866 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf408 3.97649090032 0 99.417999375 0.672000625000004 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf409 3.85964385182 0 99.437500625 0.6524993749999993 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf410 2.50228643329 0 99.46800095 0.6219990499999938 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf411 4.93072604433 0 99.516499975 0.5735000249999956 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf412 4.64385542353 0 99.558999325 0.5310006749999957 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf413 2.50228643329 0 99.366999125 0.4845013124999866 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf414 4.48527898013 0 99.27900015 0.6164997749999941 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf415 5.75501684906 0 99.582499775 0.5075002249999955 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf416 6.14799414721 0 99.503999875 0.5860001249999925 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf417 4.73066277039 0 99.42850035 0.6614996500000047 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf418 5.79060658268 0 99.5514997 0.538500300000004 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf419 2.50228643329 0 98.90149975 1.1827503749999977 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf420 1.99590274244 0 99.440002 0.6499979999999909 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf421 2.5439518228 0 99.380999675 0.4635004874999993 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf422 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf423 2.55088214386 0 99.040499575 0.9742506374999991 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf424 2.5439518228 0 99.119001075 0.8564983874999967 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf425 6.30106886729 0 99.4704999 0.6195001000000048 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf426 3.97649090032 0 99.463000875 0.6269991249999919 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf427 5.02870270579 0 99.279499875 0.615750187499998 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf428 3.86059861244 0 99.55900015 0.5309998499999949 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf429 2.55088214386 0 99.40249965 0.687500350000002 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf430 2.47778695782 0 99.033499525 0.9847507125000021 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf431 1.99590274244 0 99.400002 0.6899979999999971 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf432 2.81322619695 0 99.5774999 0.5125000999999912 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf433 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf434 3.38717868509 0 99.569500275 0.520499725000002 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf435 2.55088214386 0 98.984499925 1.058250112500005 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf436 6.57211871555 0 99.41049985 0.6795001500000041 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf437 2.57685599488 0 98.3014998 2.0827502999999936 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf438 3.92040413524 0 99.48750085 0.6024991499999942 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf439 4.73066277039 0 99.268000275 0.6329995874999881 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf440 4.90489779833 0 99.42150015 0.6684998499999978 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf441 2.5439518228 0 99.314000025 0.5639999624999987 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf442 2.51187737029 0 99.0054997 1.0267504499999944 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf443 6.14799414721 0 99.55700005 0.5329999499999986 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf444 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf445 2.57685599488 0 99.2825003 0.6112495500000037 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf446 2.47124761202 0 98.8115005 1.3177492500000056 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf447 2.78229733114 0 99.60900025 0.4809997500000037 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf448 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf449 2.57685599488 0 99.0224999 1.001250149999997 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf450 3.97649090032 0 99.51849985 0.57150015 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf451 3.80166404425 0 99.481500675 0.6084993249999912 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf452 2.47778695782 0 99.476501075 0.6134989249999961 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf453 2.57685599488 0 99.40400015 0.6859998499999961 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf454 6.16535217595 0 99.57800005 0.5119999499999978 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf455 6.7963162944 0 99.43149955 0.6585004499999997 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf456 6.16535217595 0 99.3334992 0.5347511999999881 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf457 2.57685599488 0 98.3189993 2.0565010499999943 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf458 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf459 2.51187737029 0 99.4385002 0.6514998000000048 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf460 4.48527898013 0 99.527499525 0.5625004750000017 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf461 6.20621598565 0 99.499999475 0.5900005250000021 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf462 2.5439518228 0 98.809000625 1.3214990625000027 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf463 4.90489779833 0 99.3339999 0.5340001500000042 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf464 3.84474688915 0 99.57100005 0.5189999500000028 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf465 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf466 6.7963162944 0 99.2584997 0.6472504499999943 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf467 6.61857279171 0 99.43000025 0.6599997499999916 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf468 5.22888975029 0 99.51450005 0.5754999500000025 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf469 2.57685599488 0 99.277500725 0.6187489125000027 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf470 2.55088214386 0 98.95650025 1.1002496249999894 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf471 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf472 5.02870270579 0 99.47649985 0.6135001500000016 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf473 6.20621598565 0 99.456000575 0.6339994249999933 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf474 5.79060658268 0 99.4119998 0.678000199999994 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf475 6.20621598565 0 99.4235003 0.6664996999999971 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf476 2.00016617632 0 99.559998 0.5300020000000046 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf477 2.50228643329 0 99.391498725 0.6985012749999925 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf478 2.55088214386 0 99.321500125 0.5527498124999966 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf479 3.92040413524 0 99.574499775 0.515500224999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf480 6.7963162944 0 99.50149975 0.5885002500000042 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf481 2.78229733114 0 99.61500085 0.4749991499999965 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf482 4.64385542353 0 99.51899995 0.5710000500000035 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf483 3.86059861244 0 99.604000675 0.4859993250000031 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf484 5.19985255986 0 99.580500375 0.509499624999998 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf485 2.47124761202 0 99.2139993 0.7140010500000002 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf486 2.50228643329 0 98.319499875 2.0557501874999886 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf487 2.57685599488 0 99.026999275 0.9945010875000051 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf488 3.85964385182 0 99.267500325 0.6337495124999961 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf489 1.99590274244 0 98.940002 1.1249969999999863 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf490 2.57685599488 0 99.3614989 0.4927516499999953 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf491 2.47778695782 0 98.26250005 2.141249924999997 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf492 3.95967525105 0 99.5879999 0.502000099999998 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf493 3.92040413524 0 99.561499525 0.5285004749999956 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf494 4.93072604433 0 99.5169992 0.573000799999997 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf495 2.57685599488 0 98.72399965 1.4490005250000024 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf496 4.03997047176 0 99.26750035 0.6337494749999877 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf497 2.00016617632 0 99.379997 0.46500449999999205 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf498 2.5439518228 0 99.20499985 0.7275002250000071 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf499 3.84474688915 0 99.72549965 0.3645003499999945 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf500 4.51618813067 0 99.5604993 0.5295006999999942 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf501 3.95967525105 0 99.411999625 0.6780003749999907 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf502 5.33920664205 0 99.450000925 0.6399990750000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf503 3.92040413524 0 99.562499325 0.5275006749999932 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf504 2.55088214386 0 99.474500675 0.6154993249999962 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf505 6.30106886729 0 99.268999325 0.6315010125000029 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf506 3.86059861244 0 99.612500925 0.47749907499999156 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf507 3.86059861244 0 99.50949945 0.5805005499999908 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf508 5.33920664205 0 99.26700055 0.6344991749999878 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf509 2.55088214386 0 98.9395005 1.1257492500000055 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf510 2.57685599488 0 99.3634994 0.4897509000000042 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf511 2.47778695782 0 98.987000525 1.054499212499998 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf512 6.30106886729 0 99.5064999 0.5835001000000034 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf513 4.03997047176 0 99.463000375 0.6269996250000048 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf514 6.14799414721 0 99.493500275 0.5964997249999954 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf515 6.20621598565 0 99.51799935 0.5720006500000011 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf516 2.50228643329 0 98.747499825 1.4137502624999883 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf517 5.06758777035 0 99.539499625 0.5505003749999929 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf518 5.33920664205 0 99.474000775 0.6159992250000045 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf519 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf520 2.47778695782 0 99.42150005 0.6684999499999918 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf521 3.38717868509 0 99.3224999 0.5512501500000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf522 4.62093815126 0 99.338999375 0.5265009374999963 -1 gpu conv samp 34 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf523 2.57685599488 0 99.097500525 0.8887492124999952 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf524 2.50228643329 0 99.008999525 1.021500712500007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf525 2.47778695782 0 99.1829998 0.7605002999999897 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf526 5.92620561097 0 99.543499475 0.5465005249999934 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf527 6.61857279171 0 99.49699995 0.5930000499999949 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf528 3.80166404425 0 99.515499425 0.5745005749999962 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf529 2.47778695782 0 99.042499675 0.9712504874999937 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf530 2.50228643329 0 99.37299885 0.47550172499999377 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf531 4.64385542353 0 99.424999925 0.6650000750000032 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf532 2.50228643329 0 99.372999525 0.4755007125000006 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf533 1.99590274244 0 98.599998 1.6350029999999975 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf534 5.02870270579 0 99.449499375 0.6405006249999957 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf535 3.88250959671 0 99.554000025 0.5359999750000043 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf536 3.08315119118 0 99.72599995 0.3640000499999957 -1 gpu conv samp 33 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf537 4.78704248134 0 99.564499975 0.5255000249999938 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf538 2.50228643329 0 99.092500375 0.8962494374999963 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf539 6.36224047437 0 99.5414994 0.5485005999999913 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf540 2.55088214386 0 99.173000725 0.7754989125000051 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf541 2.55088214386 0 98.207499625 2.223750562500001 -1 gpu conv perf 23 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf542 2.5439518228 0 99.0289995 0.9915007500000002 -1 gpu conv samp 36 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf543 3.85964385182 0 99.462500375 0.627499624999993 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf544 5.79060658268 0 99.508000025 0.5819999749999966 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf545 4.93072604433 0 99.42049995 0.6695000499999907 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf546 4.29202279061 0 99.5639995 0.5260005000000035 -1 gpu conv fp16 1 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf547 2.55088214386 0 99.298499575 0.5872506375000057 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf548 3.86059861244 0 99.570500025 0.5194999749999966 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf549 6.7963162944 0 99.459000275 0.6309997250000038 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf550 6.20621598565 0 99.5074999 0.5825000999999986 -1 gpu conv perf 25 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf551 6.14799414721 0 99.508500275 0.5814997249999948 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf552 6.7963162944 0 99.42750015 0.6624998499999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf553 6.20621598565 0 99.5584997 0.531500299999999 -1 gpu conv perf 29 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf554 2.55088214386 0 99.388499675 0.4522504874999882 -1 gpu conv perf 24 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf555 3.92040413524 0 99.324499525 0.5482507125000069 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf556 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf557 3.86059861244 0 99.583500325 0.5064996749999949 -1 gpu conv perf 30 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf558 2.78229733114 0 99.51749825 0.572501749999995 -1 gpu conv perf 28 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf559 4.48527898013 0 99.4650002 0.624999799999992 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf560 3.13161472572 0 99.329499575 0.5407506374999969 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf561 2.57685599488 0 99.369498875 0.4807516874999891 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf562 2.81322619695 0 99.597000425 0.49299957499999325 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf563 5.33920664205 0 99.4605002 0.6294997999999993 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf564 6.7963162944 0 99.454500075 0.6354999249999992 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf565 4.73066277039 0 99.520499175 0.5695008250000001 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf566 2.50228643329 0 97.89149975 2.6977503750000054 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf567 2.50228643329 0 99.444000275 0.6459997250000044 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf568 2.57685599488 0 98.99899945 1.0365008249999974 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf569 2.01610051566 0 99.0 1.0349999999999966 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf570 3.92040413524 0 99.573499575 0.5165004249999982 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf571 6.36224047437 0 99.441499825 0.6485001750000038 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf572 3.77195447337 0 99.31999955 0.5550006749999881 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf573 2.01610051566 0 99.519997 0.5700029999999942 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf574 2.01610051566 0 98.0 2.5349999999999966 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf575 2.50228643329 0 99.37149915 0.4777512749999886 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf576 2.50228643329 0 99.2860005 0.6059992499999964 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf577 2.50228643329 0 99.44100045 0.6489995499999935 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf578 2.81322619695 0 99.469000975 0.6209990249999976 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf579 6.36224047437 0 99.4460002 0.6439997999999975 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf580 2.01610051566 0 99.099998 0.8850029999999975 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf581 2.01610051566 0 99.32 0.5550000000000068 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf582 3.38717868509 0 99.5824997 0.5075002999999981 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf583 3.85964385182 0 99.47850095 0.6114990500000005 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf584 5.33920664205 0 99.473500525 0.616499474999992 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf585 2.57685599488 0 99.4610008 0.6289992000000041 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf586 2.50228643329 0 97.649001025 3.0614984624999906 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf587 2.50228643329 0 98.9994999 1.0357501499999913 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf588 3.38717868509 0 99.585500725 0.5044992749999949 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf589 2.01610051566 0 97.760002 2.8949969999999965 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf590 2.01610051566 0 97.900002 2.6849969999999956 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf591 2.57685599488 0 99.0359988 0.9810017999999943 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf592 2.57685599488 0 99.439000625 0.6509993749999922 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf593 2.01610051566 0 97.620003 3.104995500000001 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf594 2.01610051566 0 99.620003 0.4699970000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf595 2.50228643329 0 98.7414999 1.422750150000006 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf596 2.01610051566 0 97.82 2.805000000000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf597 5.33920664205 0 99.43200015 0.6579998500000045 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf598 5.92620561097 0 99.445 0.6450000000000046 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf599 5.02870270579 0 99.4440004 0.6459996000000047 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf600 6.36224047437 0 99.470000675 0.6199993250000034 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf601 2.50228643329 0 99.41950015 0.6704998499999931 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf602 4.73066277039 0 99.446999975 0.6430000250000006 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf603 4.48527898013 0 99.4805003 0.609499699999995 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf604 3.13161472572 0 99.57349995 0.5165000499999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf605 6.30106886729 0 99.4230005 0.6669994999999972 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf606 2.57685599488 0 99.472000525 0.6179994749999992 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf607 4.73066277039 0 99.472 0.6180000000000035 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf608 5.02870270579 0 99.520999475 0.5690005250000013 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf609 2.01610051566 0 99.400002 0.6899979999999971 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf610 4.73066277039 0 99.26550005 0.6367499249999966 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf611 2.01610051566 0 99.220001 0.7049985000000021 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf612 3.92040413524 0 99.5585006 0.5314993999999956 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf613 5.33920664205 0 99.257 0.6494999999999891 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf614 2.57685599488 0 99.019500225 1.0057496624999942 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf615 4.03997047176 0 99.473000425 0.6169995750000027 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf616 4.48527898013 0 99.4760001 0.6139999000000046 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf617 5.02870270579 0 99.470500125 0.6194998749999968 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf618 5.02870270579 0 99.469500375 0.6204996250000022 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf619 2.57685599488 0 99.3729995 0.4755007499999877 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf620 6.7963162944 0 99.26299995 0.6405000750000056 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf621 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf622 2.57685599488 0 99.4455 0.6445000000000022 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf623 2.01610051566 0 99.580002 0.5099980000000045 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf624 2.57685599488 0 98.894499925 1.1932501124999888 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf625 6.30106886729 0 99.453500225 0.6364997749999987 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf626 2.81322619695 0 99.5449997 0.5450002999999924 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf627 2.50228643329 0 99.40899965 0.6810003499999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf628 6.30106886729 0 99.460500625 0.6294993750000032 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf629 2.57685599488 0 98.8919993 1.1970010500000043 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf630 3.92040413524 0 99.545499225 0.5445007749999974 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf631 5.92620561097 0 99.266500125 0.6352498125000068 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf632 3.38717868509 0 99.3304993 0.5392510499999972 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf633 2.50228643329 0 99.37649925 0.4702511250000043 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf634 2.50228643329 0 99.18549985 0.7567502249999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf635 2.50228643329 0 97.9579998 2.5980003000000025 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf636 2.01610051566 0 98.480003 1.814995500000002 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf637 2.50228643329 0 99.386499425 0.4552508625000016 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf638 2.01610051566 0 98.839996 1.2750059999999976 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf639 5.02870270579 0 99.437000225 0.6529997749999922 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf640 2.81322619695 0 99.578500325 0.5114996750000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf641 2.50228643329 0 98.3239996 2.0490006000000065 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf642 6.30106886729 0 99.498499875 0.5915001250000046 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf643 4.03997047176 0 99.4390005 0.6509994999999918 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf644 3.92040413524 0 99.316999875 0.5595001875000065 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf645 2.01610051566 0 99.480003 0.6099970000000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf646 2.50228643329 0 99.27950065 0.6157490249999924 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf647 2.01610051566 0 99.459999 0.6300010000000015 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf648 2.57685599488 0 97.878499025 2.717251462500002 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf649 6.36224047437 0 99.46100015 0.6289998499999939 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf650 2.01610051566 0 97.699997 2.9850045000000023 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf651 2.01610051566 0 99.540001 0.549998999999994 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf652 2.01610051566 0 99.599998 0.4900019999999984 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf653 2.01610051566 0 98.959999 1.0950015000000022 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf654 5.92620561097 0 99.4569999 0.6330000999999982 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf655 2.57685599488 0 99.266500325 0.6352495125000033 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf656 3.13161472572 0 99.579499825 0.5105001749999986 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf657 2.01610051566 0 98.18 2.2649999999999864 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf658 2.57685599488 0 98.2215007 2.202748949999986 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf659 2.81322619695 0 99.590000575 0.49999942499999295 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf660 2.57685599488 0 97.950000075 2.6099998874999883 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf661 2.57685599488 0 99.175999825 0.7710002624999888 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf662 2.57685599488 0 99.3644998 0.48825029999999003 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf663 2.01610051566 0 99.559998 0.5300020000000046 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf664 2.57685599488 0 98.337999575 2.0280006375 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf665 2.57685599488 0 99.3804991 0.46425135000000495 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf666 6.7963162944 0 99.49299965 0.5970003499999962 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf667 5.92620561097 0 99.513500075 0.5764999250000017 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf668 4.48527898013 0 99.524999925 0.5650000749999947 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf669 3.85964385182 0 99.473000375 0.6169996249999997 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf670 4.03997047176 0 99.4490007 0.6409992999999986 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf671 3.85964385182 0 99.53549905 0.5545009499999992 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf672 4.03997047176 0 99.47100035 0.6189996500000007 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf673 3.85964385182 0 99.4680003 0.6219996999999978 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf674 3.77195447337 0 99.56099965 0.5290003499999983 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf675 4.73066277039 0 99.45050015 0.6394998500000014 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf676 3.38717868509 0 99.545499325 0.5445006750000033 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf677 3.13161472572 0 99.4750006 0.6149993999999964 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf678 2.57685599488 0 97.62400035 3.0989994749999923 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf679 2.50228643329 0 99.47150095 0.6184990499999913 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf680 2.50228643329 0 98.297999825 2.088000262499989 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf681 2.57685599488 0 99.417500325 0.6724996749999917 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf682 2.01610051566 0 98.440002 1.8749969999999863 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf683 4.48527898013 0 99.4639998 0.6260002000000014 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf684 2.50228643329 0 99.473501175 0.6164988250000022 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf685 2.50228643329 0 99.368 0.4830000000000041 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf686 2.57685599488 0 98.73399925 1.4340011250000018 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf687 3.85964385182 0 99.269000825 0.6314987624999873 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf688 5.33920664205 0 99.5154996 0.5745003999999995 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf689 3.38717868509 0 99.4760001 0.6139999000000046 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf690 6.30106886729 0 99.44300025 0.6469997500000005 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf691 4.73066277039 0 99.45850055 0.6314994500000012 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf692 3.77195447337 0 99.582999775 0.5070002249999931 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf693 2.50228643329 0 99.03049945 0.9892508250000063 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf694 2.01610051566 0 98.900002 1.1849969999999956 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf695 2.57685599488 0 99.3689987 0.48150194999998774 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf696 6.7963162944 0 99.43299975 0.6570002500000044 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf697 3.38717868509 0 99.5629995 0.5270004999999941 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf698 4.48527898013 0 99.428500425 0.661499575000002 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf699 2.01610051566 0 99.080002 0.9149970000000067 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf700 2.50228643329 0 98.90549945 1.1767508250000063 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf701 2.81322619695 0 99.334498375 0.5332524375000034 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf702 4.03997047176 0 99.526499175 0.5635008249999999 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf703 2.01610051566 0 98.540001 1.724998499999991 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf704 3.85964385182 0 99.437500375 0.6524996249999987 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf705 5.92620561097 0 99.46749985 0.6225001500000019 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf706 3.77195447337 0 99.54699895 0.5430010499999952 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf707 2.01610051566 0 98.120003 2.354995500000001 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf708 6.30106886729 0 99.265 0.6374999999999957 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf709 2.01610051566 0 97.879997 2.715004499999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf710 3.13161472572 0 99.591500275 0.4984997249999964 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf711 6.36224047437 0 99.26650015 0.6352497749999984 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf712 3.13161472572 0 99.543499225 0.5465007749999927 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf713 2.01610051566 0 99.160004 0.7949939999999955 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf714 2.50228643329 0 99.097500225 0.8887496624999898 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf715 5.92620561097 0 99.471999375 0.618000625000002 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf716 2.01610051566 0 99.68 0.4099999999999909 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf717 2.50228643329 0 99.0050001 1.027499849999991 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf718 2.57685599488 0 98.3044998 2.0782502999999934 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf719 4.03997047176 0 99.2690006 0.6314990999999992 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 7 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf720 3.13161472572 0 99.339998875 0.5250016874999872 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf721 2.50228643329 0 98.887998975 1.2030015374999934 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf722 6.36224047437 0 99.50550005 0.5844999500000029 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf723 2.57685599488 0 98.899499525 1.1857507125000026 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf724 3.77195447337 0 99.58400025 0.5059997499999952 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 4 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf725 5.33920664205 0 99.4505002 0.6394998000000044 -1 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf726 2.57685599488 0 99.397999825 0.6920001749999983 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf727 4.48527898013 0 99.2640007 0.6389989500000013 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 6 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf728 5.02870270579 0 99.26750035 0.6337494749999877 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 5 -3 promise swing_level 7 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf729 6.7963162944 0 99.4540009 0.6359991000000008 -1 gpu conv perf 22 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf730 2.01610051566 0 99.660004 0.42999599999999705 -1 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf731 6.7963162944 0 99.429999925 0.6600000749999936 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf732 2.57685599488 0 99.281500125 0.6127498125000059 -1 gpu conv samp 31 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp 32 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf733 2.57685599488 0 99.0864994 0.9052509000000057 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv perf 21 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 promise swing_level 6 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf734 3.92040413524 0 99.556999775 0.5330002250000035 -1 gpu conv perf 27 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ -+++++ -conf735 3.92040413524 0 99.47550035 0.6144996499999934 -1 gpu conv perf 26 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 promise swing_level 3 -3 gpu mul fp16 1 add fp16 1 tanh fp16 1 -4 gpu mul fp16 1 add fp16 1 tanh fp16 1 -5 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges.txt deleted file mode 100644 index af4d13d6f8e6b5902ff743b07ef6875d644df91a..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges.txt +++ /dev/null @@ -1,4 +0,0 @@ -0 1 -1 1 -1 1 -1 1 --1 1 -1 1 -1 1 -1 1 --1 1 -1 1 -1 1 -1 1 --1 1 -1 1 -1 1 -1 1 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges_rt.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges_rt.txt deleted file mode 100644 index 2a94f5c018eb44a397ea09e6f7ab3681d0c3c0f6..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/quant_ranges_rt.txt +++ /dev/null @@ -1,4 +0,0 @@ -1 0 1 -1 1 -1 1 -1 1 -2 -1 1 -1 1 -1 1 -1 1 -3 -1 1 -1 1 -1 1 -1 1 -4 -1 1 -1 1 -1 1 -1 1 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/tuner_confs_base.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/tuner_confs_base.txt deleted file mode 100644 index f2a85f352fe024f0fcf7828c259f8549f6461e24..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/data/tuner_confs_base.txt +++ /dev/null @@ -1,9 +0,0 @@ -2000 -+++++ -conf1 1 0 99.69 0 -1 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -2 gpu conv fp32 1 add fp32 1 tanh fp32 1 pool_max fp32 1 -3 gpu mul fp32 1 add fp32 1 tanh fp32 1 -4 gpu mul fp32 1 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/predictive/lenet_mnist.txt b/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/predictive/lenet_mnist.txt deleted file mode 100644 index b4e51dff426f4d3c5cb7b9572e6aa5940212acbd..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/lenet_mnist/predictive/lenet_mnist.txt +++ /dev/null @@ -1,409 +0,0 @@ -282.5141369999999 -+++++ -conf1 1 1 98.7 0.0 -1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -3 gpu mul fp32 11 add fp32 1 tanh fp32 1 -4 gpu mul fp32 11 add fp32 1 tanh fp32 1 -5 gpu softmax fp32 1 ------ -+++++ -conf2 1.828613181003043 2.071721708828981 98.65 0.04999999999999716 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf3 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf4 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf5 1.8936889628815377 2.139779619692146 98.65 0.04999999999999716 -1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf6 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf7 1.8247639611533713 2.0227145446958756 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf8 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf9 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf10 1.8406161850501603 2.037849502542524 98.64 0.060000000000002274 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf11 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf12 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf13 1.8663357888260776 2.115790921611576 98.64 0.060000000000002274 -1 gpu conv perf_fp16 155 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf14 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf15 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf16 1.8645645142051612 2.1037012333044935 98.61999999999999 0.0800000000000125 -1 gpu conv perf_fp16 167 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf17 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf18 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf19 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 -1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf20 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf21 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf22 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf23 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf24 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf25 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf26 2.200653361151419 2.425091789360736 98.6 0.10000000000000853 -1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf27 2.200653361151419 2.425091789360736 98.6 0.10000000000000853 -1 gpu conv samp_fp16 266 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf28 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf29 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf30 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 -1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf31 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf32 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf33 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 -1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf34 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf35 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf36 1.8916677984300285 2.155437579874673 98.58 0.12000000000000455 -1 gpu conv perf_fp16 158 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf37 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf38 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf39 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 -1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf40 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf41 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf42 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 -1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf43 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf44 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf45 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 -1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf46 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf47 1.8698191484268973 2.13979218727595 98.54 0.1599999999999966 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf48 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf49 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf50 1.8575043605938137 2.092057786757256 98.52 0.18000000000000682 -1 gpu conv perf_fp16 165 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ -+++++ -conf51 1.8534621507951072 2.1231113105788597 98.44000000000001 0.2599999999999909 -1 gpu conv perf_fp16 159 add fp16 1 pool_max fp16 1 tanh fp16 1 -2 gpu conv samp_fp16 261 add fp16 1 pool_max fp16 1 tanh fp16 1 -3 gpu mul fp16 12 add fp16 1 tanh fp16 1 -4 gpu mul fp16 12 add fp16 1 tanh fp16 1 -5 gpu softmax fp32 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt deleted file mode 100644 index 4a14a5f2e45c83a2960deccbcd0296a6d9a2f2bc..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_confs_batch220.txt +++ /dev/null @@ -1,8787 +0,0 @@ -+++++ -conf1 1 0 83.5 0 -1 gpu conv fp32 1 -2 gpu batchnorm fp32 1 -3 gpu relu fp32 1 -4 gpu group_conv fp32 1 -5 gpu batchnorm fp32 1 -6 gpu relu fp32 1 -7 gpu conv fp32 1 -8 gpu batchnorm fp32 1 -9 gpu relu fp32 1 -10 gpu group_conv fp32 1 -11 gpu batchnorm fp32 1 -12 gpu relu fp32 1 -13 gpu conv fp32 1 -14 gpu batchnorm fp32 1 -15 gpu relu fp32 1 -16 gpu group_conv fp32 1 -17 gpu batchnorm fp32 1 -18 gpu relu fp32 1 -19 gpu conv fp32 1 -20 gpu batchnorm fp32 1 -21 gpu relu fp32 1 -22 gpu group_conv fp32 1 -23 gpu batchnorm fp32 1 -24 gpu relu fp32 1 -25 gpu conv fp32 1 -26 gpu batchnorm fp32 1 -27 gpu relu fp32 1 -28 gpu group_conv fp32 1 -29 gpu batchnorm fp32 1 -30 gpu relu fp32 1 -31 gpu conv fp32 1 -32 gpu batchnorm fp32 1 -33 gpu relu fp32 1 -34 gpu group_conv fp32 1 -35 gpu batchnorm fp32 1 -36 gpu relu fp32 1 -37 gpu conv fp32 1 -38 gpu batchnorm fp32 1 -39 gpu relu fp32 1 -40 gpu group_conv fp32 1 -41 gpu batchnorm fp32 1 -42 gpu relu fp32 1 -43 gpu conv fp32 1 -44 gpu batchnorm fp32 1 -45 gpu relu fp32 1 -46 gpu group_conv fp32 1 -47 gpu batchnorm fp32 1 -48 gpu relu fp32 1 -49 gpu conv fp32 1 -50 gpu batchnorm fp32 1 -51 gpu relu fp32 1 -52 gpu group_conv fp32 1 -53 gpu batchnorm fp32 1 -54 gpu relu fp32 1 -55 gpu conv fp32 1 -56 gpu batchnorm fp32 1 -57 gpu relu fp32 1 -58 gpu group_conv fp32 1 -59 gpu batchnorm fp32 1 -60 gpu relu fp32 1 -61 gpu conv fp32 1 -62 gpu batchnorm fp32 1 -63 gpu relu fp32 1 -64 gpu group_conv fp32 1 -65 gpu batchnorm fp32 1 -66 gpu relu fp32 1 -67 gpu conv fp32 1 -68 gpu batchnorm fp32 1 -69 gpu relu fp32 1 -70 gpu group_conv fp32 1 -71 gpu batchnorm fp32 1 -72 gpu relu fp32 1 -73 gpu conv fp32 1 -74 gpu batchnorm fp32 1 -75 gpu relu fp32 1 -76 gpu group_conv fp32 1 -77 gpu batchnorm fp32 1 -78 gpu relu fp32 1 -79 gpu conv fp32 1 -80 gpu batchnorm fp32 1 -81 gpu relu fp32 1 -82 gpu pool_mean fp32 1 -83 gpu mul fp32 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf1 1.66592032533 0 82.900002 0.899996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf2 1.66766290747 0 82.860001 0.9599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf3 1.66670139642 0 82.900002 0.899996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf4 1.66748320027 0 82.940002 0.8399969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf5 1.66826573791 0 82.940002 0.8399969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf6 1.55813665736 0 82.900002 0.899996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf7 1.62488529847 0 82.979996 0.7800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf8 1.66748320027 0 83.080002 0.6299970000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf9 1.62414291979 0 82.979996 0.7800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf10 1.63389893891 0 83.12001 0.5699850000000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf11 1.63332068534 0 83.119995 0.5700074999999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf12 1.66826573791 0 83.080002 0.6299970000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf13 1.66345526231 0 83.019997 0.7200044999999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf14 1.62414291979 0 82.860001 0.9599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf15 1.66867727615 0 83.219994 0.6800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf16 1.6939272698 0 82.360001 1.7099985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf17 1.69125907336 0 82.979996 0.7800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf18 1.66267723003 0 82.880005 0.9299925000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf19 1.69414545349 0 82.259995 1.8600074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf20 1.66846560269 0 83.039993 0.6900105000000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 33 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf21 1.69333845447 0 82.360001 1.7099985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf22 1.68774460395 0 82.239998 1.890003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf23 1.69063951413 0 82.899994 0.9000089999999901 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf24 1.66867727615 0 83.659996 0.24000399999999333 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf25 1.69454924151 0 82.519997 1.4700044999999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf26 1.68718051806 0 83.020004 0.7199939999999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf27 1.69495322205 0 82.379997 1.6800044999999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf28 1.66267723003 0 83.100006 0.5999910000000099 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf29 1.66867727615 0 83.119995 0.5700074999999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf30 1.69374185785 0 82.239998 1.890003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf31 1.64669056053 0 82.880005 0.9299925000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf32 1.69166148649 0 82.800003 1.0499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf33 1.66867727615 0 83.219994 0.6800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf34 1.6939272698 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf35 1.69290255882 0 82.259995 1.8600074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf36 1.64980944683 0 82.32 1.7700000000000102 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf37 1.66750316607 0 83.379997 0.520002999999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf38 1.58532838785 0 82.259995 1.8600074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf39 1.6384977922 0 83.060005 0.6599924999999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf40 1.69312047859 0 82.219994 1.9200090000000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf41 1.66867727615 0 82.720001 1.1699985000000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf42 1.58568196474 0 82.280006 1.8299909999999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf43 1.67188008661 0 83.060005 0.6599924999999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf44 1.68426507974 0 82.300003 1.7999954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 36 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf45 1.67470064441 0 81.800003 2.5499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf46 1.5655260791 0 82.060005 2.159992499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 29 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf47 1.72373997515 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf48 1.71832409894 0 82.400002 1.649996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf49 1.70069521045 0 83.080002 0.6299970000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf50 1.67391205968 0 82.18 1.9799999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf51 1.72248712959 0 81.920006 2.369990999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf52 1.66577380924 0 82.18 1.9799999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv fp16 1 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf53 1.72415799543 0 81.539993 2.9400105000000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf54 1.69663672592 0 82.559998 1.4100030000000103 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf55 1.7199868941 0 82.320007 1.769989499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf56 1.67391205968 0 81.800003 2.5499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf57 1.72415799543 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf58 1.67369905614 0 82.440002 1.5899969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf59 1.69988196097 0 82.459999 1.5600015000000056 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf60 1.72332215751 0 82.540001 1.4399984999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf61 1.56656092908 0 82.440002 1.5899969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf62 1.66577380924 0 82.819992 1.0200120000000013 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf63 1.72415799543 0 81.599998 2.850003000000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf64 1.61121417569 0 81.899994 2.40000899999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf65 1.72081949906 0 82.360001 1.7099985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf66 1.64082797814 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf67 1.67470064441 0 81.500008 2.999988000000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf68 1.69663672592 0 82.040001 2.1899984999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf69 1.72415799543 0 81.860001 2.4599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf70 1.67470064441 0 82.639999 1.2900014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf71 1.67470064441 0 82.639999 1.2900014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf72 1.72309639446 0 82.379997 1.6800044999999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf73 1.67786242745 0 82.180008 1.9799879999999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf74 1.72332215751 0 82.759995 1.1100074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf75 1.7199868941 0 82.099998 2.100003000000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf76 1.56621582711 0 82.419998 1.6200029999999899 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf77 1.72332215751 0 82.579994 1.3800090000000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf78 1.7216529105 0 82.400002 1.649996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf79 1.72332215751 0 82.540001 1.4399984999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf80 1.72373997515 0 81.580002 2.87999700000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf81 1.72373997515 0 82.0 2.25 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf82 1.70069521045 0 82.18 1.9799999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf83 1.72415799543 0 82.020004 2.219994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf84 1.72415799543 0 81.860001 2.4599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf85 1.72309639446 0 82.199997 1.9500045000000057 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 33 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf86 1.56621582711 0 81.659996 2.76000599999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf87 1.66865728222 0 82.080002 2.12999700000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf88 1.67391205968 0 81.659996 2.76000599999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf89 1.69641790012 0 82.639999 1.2900014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf90 1.67470064441 0 82.379997 1.6800044999999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf91 1.70069521045 0 82.139999 2.0400014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf92 1.67430625919 0 81.680008 2.7299879999999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf93 1.72267888872 0 81.580002 2.87999700000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf94 1.56794285911 0 81.68 2.7299999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv fp16 1 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf95 1.72081949906 0 82.159996 2.01000599999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf96 1.70069521045 0 82.18 1.9799999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf97 1.66167014075 0 82.880005 0.9299925000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf98 1.72248712959 0 82.120003 2.0699955000000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf99 1.72415799543 0 81.800003 2.5499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf100 1.72415799543 0 82.020004 2.219994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 31 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt deleted file mode 100644 index 86b061f3d9ff5b75a9580ae65afd9ff6c20f9701..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_pareto_confs_batch220.txt +++ /dev/null @@ -1,7482 +0,0 @@ -+++++ -conf1 1 0 83.5 0 -1 gpu conv fp32 1 -2 gpu batchnorm fp32 1 -3 gpu relu fp32 1 -4 gpu group_conv fp32 1 -5 gpu batchnorm fp32 1 -6 gpu relu fp32 1 -7 gpu conv fp32 1 -8 gpu batchnorm fp32 1 -9 gpu relu fp32 1 -10 gpu group_conv fp32 1 -11 gpu batchnorm fp32 1 -12 gpu relu fp32 1 -13 gpu conv fp32 1 -14 gpu batchnorm fp32 1 -15 gpu relu fp32 1 -16 gpu group_conv fp32 1 -17 gpu batchnorm fp32 1 -18 gpu relu fp32 1 -19 gpu conv fp32 1 -20 gpu batchnorm fp32 1 -21 gpu relu fp32 1 -22 gpu group_conv fp32 1 -23 gpu batchnorm fp32 1 -24 gpu relu fp32 1 -25 gpu conv fp32 1 -26 gpu batchnorm fp32 1 -27 gpu relu fp32 1 -28 gpu group_conv fp32 1 -29 gpu batchnorm fp32 1 -30 gpu relu fp32 1 -31 gpu conv fp32 1 -32 gpu batchnorm fp32 1 -33 gpu relu fp32 1 -34 gpu group_conv fp32 1 -35 gpu batchnorm fp32 1 -36 gpu relu fp32 1 -37 gpu conv fp32 1 -38 gpu batchnorm fp32 1 -39 gpu relu fp32 1 -40 gpu group_conv fp32 1 -41 gpu batchnorm fp32 1 -42 gpu relu fp32 1 -43 gpu conv fp32 1 -44 gpu batchnorm fp32 1 -45 gpu relu fp32 1 -46 gpu group_conv fp32 1 -47 gpu batchnorm fp32 1 -48 gpu relu fp32 1 -49 gpu conv fp32 1 -50 gpu batchnorm fp32 1 -51 gpu relu fp32 1 -52 gpu group_conv fp32 1 -53 gpu batchnorm fp32 1 -54 gpu relu fp32 1 -55 gpu conv fp32 1 -56 gpu batchnorm fp32 1 -57 gpu relu fp32 1 -58 gpu group_conv fp32 1 -59 gpu batchnorm fp32 1 -60 gpu relu fp32 1 -61 gpu conv fp32 1 -62 gpu batchnorm fp32 1 -63 gpu relu fp32 1 -64 gpu group_conv fp32 1 -65 gpu batchnorm fp32 1 -66 gpu relu fp32 1 -67 gpu conv fp32 1 -68 gpu batchnorm fp32 1 -69 gpu relu fp32 1 -70 gpu group_conv fp32 1 -71 gpu batchnorm fp32 1 -72 gpu relu fp32 1 -73 gpu conv fp32 1 -74 gpu batchnorm fp32 1 -75 gpu relu fp32 1 -76 gpu group_conv fp32 1 -77 gpu batchnorm fp32 1 -78 gpu relu fp32 1 -79 gpu conv fp32 1 -80 gpu batchnorm fp32 1 -81 gpu relu fp32 1 -82 gpu pool_mean fp32 1 -83 gpu mul fp32 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf1 1.66592032533 0 82.900002 0.899996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf2 1.66766290747 0 82.860001 0.9599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf3 1.66670139642 0 82.900002 0.899996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf4 1.66748320027 0 82.940002 0.8399969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf5 1.66826573791 0 82.940002 0.8399969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf6 1.62488529847 0 82.979996 0.7800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf7 1.66748320027 0 83.080002 0.6299970000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf8 1.62414291979 0 82.979996 0.7800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf9 1.63389893891 0 83.12001 0.5699850000000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf10 1.63332068534 0 83.119995 0.5700074999999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf11 1.66826573791 0 83.080002 0.6299970000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf12 1.66345526231 0 83.019997 0.7200044999999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf13 1.62414291979 0 82.860001 0.9599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf14 1.66867727615 0 83.219994 0.6800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf15 1.6939272698 0 82.360001 1.7099985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf16 1.69125907336 0 82.979996 0.7800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf17 1.66267723003 0 82.880005 0.9299925000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf18 1.69414545349 0 82.259995 1.8600074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf19 1.66846560269 0 83.039993 0.6900105000000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 33 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf20 1.69333845447 0 82.360001 1.7099985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf21 1.68774460395 0 82.239998 1.890003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf22 1.69063951413 0 82.899994 0.9000089999999901 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf23 1.66867727615 0 83.659996 0.24000399999999333 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf24 1.69454924151 0 82.519997 1.4700044999999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf25 1.68718051806 0 83.020004 0.7199939999999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf26 1.69495322205 0 82.379997 1.6800044999999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf27 1.66267723003 0 83.100006 0.5999910000000099 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf28 1.66867727615 0 83.119995 0.5700074999999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf29 1.69374185785 0 82.239998 1.890003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf30 1.64669056053 0 82.880005 0.9299925000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf31 1.69166148649 0 82.800003 1.0499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf32 1.66867727615 0 83.219994 0.6800060000000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf33 1.6939272698 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf34 1.69290255882 0 82.259995 1.8600074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf35 1.64980944683 0 82.32 1.7700000000000102 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf36 1.66750316607 0 83.379997 0.520002999999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf37 1.6384977922 0 83.060005 0.6599924999999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf38 1.69312047859 0 82.219994 1.9200090000000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf39 1.66867727615 0 82.720001 1.1699985000000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf40 1.67188008661 0 83.060005 0.6599924999999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf41 1.68426507974 0 82.300003 1.7999954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 36 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf42 1.67470064441 0 81.800003 2.5499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf43 1.72373997515 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf44 1.71832409894 0 82.400002 1.649996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf45 1.70069521045 0 83.080002 0.6299970000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf46 1.72248712959 0 81.920006 2.369990999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf47 1.72415799543 0 81.539993 2.9400105000000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf48 1.69663672592 0 82.559998 1.4100030000000103 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf49 1.7199868941 0 82.320007 1.769989499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf50 1.72415799543 0 82.240005 1.8899925000000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf51 1.67369905614 0 82.440002 1.5899969999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf52 1.69988196097 0 82.459999 1.5600015000000056 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf53 1.72332215751 0 82.540001 1.4399984999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf54 1.66577380924 0 82.819992 1.0200120000000013 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf55 1.72415799543 0 81.599998 2.850003000000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf56 1.72081949906 0 82.360001 1.7099985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf57 1.67470064441 0 81.500008 2.999988000000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf58 1.69663672592 0 82.040001 2.1899984999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf59 1.72415799543 0 81.860001 2.4599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf60 1.67470064441 0 82.639999 1.2900014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf61 1.67470064441 0 82.639999 1.2900014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf62 1.72309639446 0 82.379997 1.6800044999999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf63 1.67786242745 0 82.180008 1.9799879999999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf64 1.72332215751 0 82.759995 1.1100074999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf65 1.7199868941 0 82.099998 2.100003000000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf66 1.72332215751 0 82.579994 1.3800090000000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf67 1.7216529105 0 82.400002 1.649996999999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf68 1.72332215751 0 82.540001 1.4399984999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf69 1.72373997515 0 81.580002 2.87999700000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf70 1.72373997515 0 82.0 2.25 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf71 1.70069521045 0 82.18 1.9799999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf72 1.72415799543 0 82.020004 2.219994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf73 1.72415799543 0 81.860001 2.4599985000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf74 1.72309639446 0 82.199997 1.9500045000000057 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 33 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf75 1.69641790012 0 82.639999 1.2900014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf76 1.67470064441 0 82.379997 1.6800044999999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf77 1.70069521045 0 82.139999 2.0400014999999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf78 1.67430625919 0 81.680008 2.7299879999999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf79 1.72267888872 0 81.580002 2.87999700000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf80 1.72081949906 0 82.159996 2.01000599999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf81 1.70069521045 0 82.18 1.9799999999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf82 1.66167014075 0 82.880005 0.9299925000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf83 1.72248712959 0 82.120003 2.0699955000000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf84 1.72415799543 0 81.800003 2.5499954999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf85 1.72415799543 0 82.020004 2.219994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 31 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt deleted file mode 100644 index 3b628d570fcb1884cfa10371a2aaf6856a652d1e..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi.txt +++ /dev/null @@ -1,80039 +0,0 @@ -+++++ -conf1 1 0 83.5 0 -1 gpu conv fp32 1 -2 gpu batchnorm fp32 1 -3 gpu relu fp32 1 -4 gpu group_conv fp32 1 -5 gpu batchnorm fp32 1 -6 gpu relu fp32 1 -7 gpu conv fp32 1 -8 gpu batchnorm fp32 1 -9 gpu relu fp32 1 -10 gpu group_conv fp32 1 -11 gpu batchnorm fp32 1 -12 gpu relu fp32 1 -13 gpu conv fp32 1 -14 gpu batchnorm fp32 1 -15 gpu relu fp32 1 -16 gpu group_conv fp32 1 -17 gpu batchnorm fp32 1 -18 gpu relu fp32 1 -19 gpu conv fp32 1 -20 gpu batchnorm fp32 1 -21 gpu relu fp32 1 -22 gpu group_conv fp32 1 -23 gpu batchnorm fp32 1 -24 gpu relu fp32 1 -25 gpu conv fp32 1 -26 gpu batchnorm fp32 1 -27 gpu relu fp32 1 -28 gpu group_conv fp32 1 -29 gpu batchnorm fp32 1 -30 gpu relu fp32 1 -31 gpu conv fp32 1 -32 gpu batchnorm fp32 1 -33 gpu relu fp32 1 -34 gpu group_conv fp32 1 -35 gpu batchnorm fp32 1 -36 gpu relu fp32 1 -37 gpu conv fp32 1 -38 gpu batchnorm fp32 1 -39 gpu relu fp32 1 -40 gpu group_conv fp32 1 -41 gpu batchnorm fp32 1 -42 gpu relu fp32 1 -43 gpu conv fp32 1 -44 gpu batchnorm fp32 1 -45 gpu relu fp32 1 -46 gpu group_conv fp32 1 -47 gpu batchnorm fp32 1 -48 gpu relu fp32 1 -49 gpu conv fp32 1 -50 gpu batchnorm fp32 1 -51 gpu relu fp32 1 -52 gpu group_conv fp32 1 -53 gpu batchnorm fp32 1 -54 gpu relu fp32 1 -55 gpu conv fp32 1 -56 gpu batchnorm fp32 1 -57 gpu relu fp32 1 -58 gpu group_conv fp32 1 -59 gpu batchnorm fp32 1 -60 gpu relu fp32 1 -61 gpu conv fp32 1 -62 gpu batchnorm fp32 1 -63 gpu relu fp32 1 -64 gpu group_conv fp32 1 -65 gpu batchnorm fp32 1 -66 gpu relu fp32 1 -67 gpu conv fp32 1 -68 gpu batchnorm fp32 1 -69 gpu relu fp32 1 -70 gpu group_conv fp32 1 -71 gpu batchnorm fp32 1 -72 gpu relu fp32 1 -73 gpu conv fp32 1 -74 gpu batchnorm fp32 1 -75 gpu relu fp32 1 -76 gpu group_conv fp32 1 -77 gpu batchnorm fp32 1 -78 gpu relu fp32 1 -79 gpu conv fp32 1 -80 gpu batchnorm fp32 1 -81 gpu relu fp32 1 -82 gpu pool_mean fp32 1 -83 gpu mul fp32 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf2 2.98991537361 0 83.386665875 0.5133341249999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf3 3.88159289347 0 83.2783331 0.6216669000000025 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf4 4.14749473048 0 83.220000325 0.6799996749999991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf5 4.4175200707 0 83.219999875 0.6800001250000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf6 4.43502212401 0 83.155834675 0.5162479875000088 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf7 4.10832403497 0 83.103333275 0.5950000875000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf8 3.31453105661 0 82.59083295 1.3637505749999903 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf9 3.52220799908 0 82.4716658 1.542501300000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf10 3.44814122333 0 82.51916615 1.4712507749999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf11 3.02800023045 0 82.64999965 1.275000525000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf12 3.68207420915 0 82.448332775 1.5775008375000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf13 3.862916011 0 82.7708336 1.0937495999999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf14 3.7573272945 0 82.422500075 1.6162498874999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf15 3.93066025121 0 82.594167 1.3587495000000018 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf16 3.87800695966 0 82.430000125 1.6049998124999902 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf17 3.68207420915 0 82.56333275 1.405000874999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf18 3.22097285505 0 83.564167475 0.33583252499999505 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf19 3.82500219093 0 82.9275003 0.8587495499999918 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf20 3.90066717558 0 82.329165975 1.7562510375000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf21 3.86501599073 0 83.115000925 0.5774986124999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf22 3.40096875474 0 82.72416595 1.1637510750000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf23 3.5538161637 0 82.9700005 0.7949992500000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf24 3.17344943111 0 83.00083265 0.74875102499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf25 2.76788477576 0 82.447499725 1.5787504124999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf26 3.92397614204 0 83.174999825 0.48750026249999934 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf27 3.4092100078 0 83.1424999 0.5362501499999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf28 3.03961006636 0 82.704167175 1.1937492375000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf29 3.62973730797 0 83.085833325 0.6212500125000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf30 2.81140054286 0 82.4325003 1.6012495499999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf31 3.18575701105 0 82.52833345 1.4574998249999922 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf32 3.42595978009 0 82.7708333 1.09375004999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf33 3.17255385439 0 82.7233329 1.165000649999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf34 3.6391339197 0 82.831667325 1.002499012500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf35 2.72368244288 0 83.034168075 0.698747887500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf36 3.44714364594 0 82.539999575 1.440000637500006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf37 3.85171694927 0 83.137500575 0.5437491374999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf38 3.02151032351 0 83.1958331 0.4562503499999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf39 3.92280583455 0 83.017499875 0.7237501875000021 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf40 4.15840004076 0 82.141666525 2.0375002124999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf41 2.89589235375 0 82.634166725 1.2987499125 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf42 6.16453028593 0 81.9308327 2.3537509500000056 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf43 4.7463107647 0 81.9616666 2.3075000999999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf44 5.84575268801 0 81.983333575 2.2749996374999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf45 3.45773167067 0 82.297500375 1.8037494375000023 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf46 4.34036485844 0 82.463333875 1.5549991874999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf47 4.75207062649 0 81.74500045 2.6324993249999906 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf48 6.09000225926 0 81.9883331 2.2675003499999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf49 5.25532208128 0 81.92083345 2.3687498249999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf50 4.35262313423 0 82.356666825 1.7149997624999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf51 5.25144034242 0 81.9350005 2.3474992499999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf52 5.32967222406 0 81.9616671 2.3074993500000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf53 4.13210954206 0 83.056666875 0.6649996874999928 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf54 3.93967771859 0 81.763332525 2.6050012125000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf55 4.13872474867 0 82.631666775 1.302499837500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf56 2.7690518229 0 81.93666605 2.345000925000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf57 5.60283597265 0 81.9233338 2.364999300000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf58 4.45486021161 0 82.0608328 2.1587508 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf59 4.22738367053 0 82.226667075 1.909999387500008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf60 6.10852785257 0 81.959167325 2.3112490125000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf61 4.98692149992 0 81.822500025 2.516249962500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf62 6.11662384336 0 81.9808335 2.278749749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf63 5.97727027928 0 82.0224998 2.2162502999999916 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf64 2.93382347771 0 81.85416565 2.468751525000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf65 5.54950778131 0 81.73833355 2.6424996750000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf66 6.11662384336 0 81.889999075 2.4150013874999914 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf67 3.82767121119 0 82.26583335 1.8512499750000089 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf68 4.99324893801 0 81.816667 2.524999500000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf69 5.74180480491 0 81.889166525 2.4162502125000103 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf70 2.8416794212 0 82.981666775 0.7774998375000095 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf71 5.63492586502 0 81.993333575 2.259999637500009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf72 3.83421974764 0 83.207499825 0.6925001750000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf73 1.87482212142 0 82.811667125 1.0324993125000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf74 4.43826460769 0 81.75166725 2.6224991250000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf75 5.03123041946 0 82.306666375 1.790000437499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf76 4.2701321542 0 82.1975002 1.9537497000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf77 3.8174916299 0 83.4608337 0.4391663000000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf78 3.54707867194 0 83.409999875 0.4900001250000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf79 3.90991089555 0 83.46999955 0.43000045000000287 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf80 3.67974499409 0 83.3833332 0.5166668000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf81 2.82550849059 0 83.28583315 0.614166849999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf82 2.77356236628 0 83.60833335 0.2916666500000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf83 2.54319644535 0 83.399166475 0.5008335249999988 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf84 3.73175797849 0 83.50083335 0.3991666500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf85 3.36226524472 0 83.472500175 0.4274998250000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf86 3.00745254477 0 83.33583355 0.5641664499999962 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf87 3.95482984539 0 83.289165675 0.6108343249999933 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf88 2.89889994154 0 83.269167175 0.6308328249999932 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf89 3.60299625636 0 83.3791664 0.5208335999999975 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf90 3.13621575975 0 83.54083385 0.3591661500000015 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf91 2.61388509814 0 83.457500225 0.44249977499999604 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf92 4.05930607617 0 83.1716666 0.492500100000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf93 3.73175797849 0 83.575000225 0.32499977500000343 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf94 3.76274140853 0 83.47916685 0.42083315000000143 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf95 3.20332969056 0 83.850833275 0.04916672500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf96 3.64570944225 0 83.45249935 0.4475006500000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf97 3.85035669633 0 83.5608333 0.33916670000000126 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf98 3.54829526922 0 83.25249975 0.6475002500000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf99 3.61899339422 0 83.278334075 0.6216659249999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf100 3.28254525212 0 83.489167025 0.4108329749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf101 3.50816224551 0 83.252499725 0.6475002749999931 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf102 4.10549146346 0 83.3416668 0.5583332000000013 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf103 3.36715352889 0 83.584166725 0.31583327499999714 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf104 3.1088246435 0 83.1591665 0.5112502500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf105 3.47488338292 0 83.388333125 0.5116668749999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf106 3.59538418566 0 83.5300007 0.3699992999999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf107 3.97286473272 0 83.537499975 0.3625000249999971 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf108 3.8174916299 0 83.451667075 0.4483329249999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf109 3.46345463754 0 83.462500825 0.4374991749999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf110 3.556746151 0 83.424999425 0.47500057500000425 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf111 3.96049527585 0 83.53333295 0.3666670499999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf112 3.9715961288 0 83.32833325 0.57166675 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf113 3.93130152041 0 82.56666695 1.3999995750000025 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf114 2.8668123125 0 82.998333125 0.7525003124999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf115 2.64845545339 0 82.965000875 0.8024986874999982 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf116 3.14597582271 0 82.849167475 0.9762487874999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf117 2.72482912735 0 83.4741666 0.42583339999999625 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf118 3.95103617451 0 82.3491667 1.7262499500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf119 3.00588110745 0 83.02166615 0.7175007749999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf120 3.28000196808 0 82.762500375 1.1062494374999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf121 3.47173739276 0 82.802499825 1.0462502625000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf122 2.61621549789 0 83.197500425 0.4537493624999982 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf123 2.95549421538 0 83.010000475 0.7349992875000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf124 3.88669230643 0 82.45333265 1.5700010250000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf125 2.83364863532 0 82.581667225 1.3774991624999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf126 3.08002863884 0 82.504999925 1.4925001124999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf127 2.00048974491 0 82.50666635 1.490000474999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf128 2.73765608296 0 82.255832625 1.8662510625000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf129 4.06487927094 0 83.13333375 0.5499993749999916 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf130 4.09684344986 0 83.003333475 0.7449997874999923 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf131 3.21849911232 0 82.29583385 1.806249225000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf132 2.04860322208 0 82.465833275 1.5512500875000086 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf133 3.80144895722 0 82.52833315 1.4575002750000081 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf134 2.56619518427 0 82.785834275 1.0712485875000013 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf135 3.62695395201 0 82.454165975 1.5687510375000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf136 3.89727045934 0 82.97500015 0.7874997750000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf137 4.12276718448 0 82.457499725 1.5637504124999921 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf138 3.60700810131 0 82.545832825 1.4312507624999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf139 4.0205364833 0 81.89083385 2.4137492249999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf140 4.11840212461 0 81.8091669 2.536249650000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf141 3.01049479281 0 82.019999875 2.2200001875000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf142 3.85335942385 0 82.0483325 2.177501249999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf143 2.5026299742 0 81.625833925 2.8112491125000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf144 3.43886954105 0 82.726666675 1.1599999874999938 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf145 3.73017005141 0 82.89583355 0.9062496749999909 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf146 3.15732515345 0 81.805000075 2.542499887500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf147 3.98632041312 0 82.07166655 2.142500174999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf148 3.80024443647 0 82.9274992 0.8587512000000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf149 4.08467265051 0 82.004166025 2.2437509624999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf150 4.02990448369 0 82.30916665 1.7862500250000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf151 3.90355228103 0 81.8025006 2.546249099999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf152 3.94843898601 0 81.8916664 2.412500399999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf153 4.12751348406 0 81.888333875 2.417499187499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf154 4.02515780566 0 81.9924993 2.2612510499999914 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf155 3.83482666749 0 82.1449991 2.0325013499999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf156 3.8517164764 0 82.2041664 1.9437503999999919 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf157 4.10598132256 0 81.954167525 2.318748712499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf158 3.31478397356 0 81.75083395 2.623749074999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf159 3.96623260541 0 82.226666875 1.9099996874999903 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf160 4.04782353886 0 82.049166175 2.1762507375000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf161 4.02858926028 0 81.793333275 2.5600000875000077 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf162 3.00292158132 0 81.6974989 2.703751650000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf163 3.29748236022 0 82.1050001 2.092499850000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf164 4.0362576555 0 82.2466675 1.8799987499999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf165 3.05446537337 0 82.1716669 1.992499649999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf166 3.9071658065 0 82.190832875 1.9637506875000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf167 3.93287113327 0 82.12666665 2.0600000249999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf168 4.04478954767 0 81.856666625 2.465000062499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf169 3.61558117477 0 81.9016671 2.397499350000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf170 4.30825679247 0 81.851666825 2.472499762500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf171 3.69363094091 0 82.946667475 0.8299987875000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf172 2.77993236963 0 82.4766672 1.5349992000000086 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf173 2.54145510026 0 82.7925007 1.0612489499999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf174 3.87860542119 0 82.008333625 2.237499562499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf175 3.56868994119 0 82.02499975 2.2125003749999905 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf176 2.23770347257 0 83.0258332 0.7112502000000092 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf177 2.49861746763 0 82.984166525 0.7737502124999907 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf178 2.02993947881 0 83.30250015 0.5974998499999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf179 2.20017225716 0 83.1766651 0.48500235000000913 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf180 2.71551751125 0 83.228333425 0.6716665749999976 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf181 2.06184549766 0 83.031666575 0.702500137499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf182 2.77617639439 0 82.328333125 1.7575003124999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf183 3.8694276968 0 82.7800006 1.079999100000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf184 2.34590457627 0 82.2816663 1.8275005500000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf185 4.07407440381 0 82.244165975 1.883751037499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf186 3.11780856309 0 82.428332975 1.6075005374999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf187 3.58558835651 0 82.3399998 1.7400002999999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf188 3.85234242953 0 82.34583265 1.7312510249999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf189 2.7074193437 0 82.325833575 1.7612496374999935 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf190 4.05895393605 0 82.3066669 1.7899996500000057 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf191 3.78103563563 0 82.3833346 1.6749981000000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf192 3.7929361233 0 82.3158331 1.7762503499999909 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf193 2.97917012062 0 82.399166375 1.6512504375000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf194 3.70896846547 0 82.84583335 0.9812499749999901 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf195 3.05031465583 0 82.129166025 2.0562509624999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf196 4.76953621711 0 81.905 2.3924999999999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf197 4.82068705485 0 82.02250055 2.2162491749999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf198 3.40685460008 0 82.039999825 2.190000262500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf199 3.7406185613 0 82.309166475 1.7862502875000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf200 3.09685498241 0 82.1758326 1.9862510999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf201 3.23081977958 0 82.9983336 0.7524996000000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf202 3.96567454672 0 82.4983326 1.5025011000000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf203 4.59326226068 0 82.658333425 1.2624998625000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf204 4.03800709024 0 82.6316668 1.3024997999999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf205 3.42928358185 0 82.031665675 2.202501487500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf206 3.32221289747 0 82.153333675 2.0199994875000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf207 5.66794988438 0 81.89250005 2.411249925000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf208 2.71874001219 0 82.497499925 1.5037501125000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf209 3.03474416486 0 82.05833335 2.162499975000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf210 3.88176217612 0 82.449999725 1.5750004125000032 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf211 3.54762785706 0 83.89833405 0.0016659499999974736 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf212 3.05039482856 0 82.7850004 1.072499399999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf213 2.82821126308 0 82.680833325 1.228750012500008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf214 3.81477730635 0 82.135832925 2.046250612499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf215 3.39722289075 0 83.580000125 0.31999987500000204 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf216 3.89111320826 0 82.1216669 2.067499650000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf217 3.50253383593 0 82.2108337 1.9337494500000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf218 4.05813557147 0 82.50000075 1.4999988750000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf219 3.26770542063 0 83.100000825 0.5999987625000074 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf220 3.18828477511 0 82.7100005 1.18499924999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 31 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf221 2.71225509774 0 82.55166665 1.422500024999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf222 3.87817450174 0 82.5324995 1.4512507499999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf223 4.03339162129 0 82.4558336 1.566249599999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf224 3.91264706835 0 82.13083465 2.0537480250000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf225 3.06837262281 0 82.53416635 1.44875047499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf226 3.69764932636 0 81.918333375 2.3724999374999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf227 2.43486575166 0 82.865833525 0.951249712500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf228 2.32359233423 0 82.09833355 2.10249967499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf229 3.42928358185 0 82.219165975 1.9212510375000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf230 3.55619894808 0 81.826667025 2.509999462499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf231 2.86694058251 0 82.870833375 0.9437499374999945 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf232 4.88375274604 0 81.896667125 2.40499931250001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf233 3.28938021997 0 82.0241669 2.213749650000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf234 4.39309472823 0 81.828332925 2.507500612500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf235 3.79357213589 0 83.575833175 0.3241668250000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf236 2.12486885207 0 83.46416625 0.4358337499999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf237 3.95836341588 0 83.341666975 0.5583330250000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf238 2.25904057642 0 83.567499 0.33250100000000204 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf239 3.39599428853 0 83.3233327 0.5766673000000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf240 2.27086134134 0 83.0875002 0.6187497000000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf241 2.25812915866 0 83.616666775 0.28333322500000124 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf242 2.12981011418 0 83.4783326 0.42166739999999836 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf243 2.57878675932 0 83.4525004 0.4474995999999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf244 2.61999068304 0 83.43166675 0.4683332499999949 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf245 2.62385834639 0 83.3758336 0.5241663999999929 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf246 3.78695562862 0 83.406667325 0.4933326749999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf247 3.25056012417 0 83.410000075 0.4899999250000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf248 2.64116522688 0 83.459999775 0.440000225 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf249 3.87266738703 0 83.2074999 0.6925000999999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf250 3.1762968602 0 83.257500125 0.642499874999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf251 2.60635725011 0 83.481666275 0.4183337250000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf252 2.25521755755 0 83.1966667 0.45499995000000837 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf253 3.2990666889 0 83.4449997 0.45500030000000324 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf254 2.87427851974 0 83.0825007 0.6262489500000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf255 2.55397266535 0 83.6124996 0.28750039999999333 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf256 3.90339685542 0 83.2258328 0.6741671999999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf257 3.3158753237 0 83.366665625 0.533334375000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf258 2.71891142175 0 83.264168025 0.6358319749999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf259 2.07975389368 0 83.454165825 0.445834174999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf260 2.7013324964 0 83.281666425 0.6183335750000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf261 2.27247670758 0 83.3800005 0.5199995000000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf262 3.79597347164 0 83.321666575 0.5783334250000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf263 2.98573825971 0 83.297500075 0.6024999249999979 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf264 2.6496606385 0 83.2325002 0.667499799999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf265 3.60943759784 0 83.4424997 0.45750030000000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf266 2.9593393986 0 83.5075001 0.39249989999999857 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf267 3.41172685129 0 83.2766666 0.6233334000000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf268 2.38589860172 0 83.311666475 0.5883335250000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf269 2.12944547394 0 83.467499575 0.43250042499999497 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf270 3.20776362372 0 83.172499575 0.491250637499995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf271 3.95396064036 0 83.240833525 0.6591664750000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf272 3.92038291833 0 82.4716655 1.5425017499999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf273 2.55641477625 0 83.0358341 0.6962488499999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf274 3.38281248672 0 82.490833825 1.5137492625000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf275 3.26886223339 0 82.390833125 1.6637503124999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf276 2.88984083604 0 83.253333025 0.6466669749999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf277 2.55175275146 0 82.50333375 1.4949993750000061 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf278 2.62376471646 0 82.295832175 1.8062517374999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf279 2.89529880947 0 82.244166875 1.8837496874999928 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf280 3.20641600424 0 82.4850002 1.5224996999999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf281 3.35812188872 0 82.4258341 1.6112488499999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf282 2.49677940941 0 82.71750005 1.1737499250000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf283 3.38120370248 0 82.390834075 1.6637488874999988 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf284 2.61676165298 0 82.824166125 1.0137508124999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 36 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf285 2.95471428151 0 83.240000175 0.6599998249999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf286 3.16800384081 0 82.459167225 1.5612491624999976 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf287 3.29072931971 0 82.5541669 1.4187496500000023 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf288 3.86883458805 0 82.56500035 1.402499474999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf289 3.53914203218 0 83.42500025 0.4749997500000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf290 2.88656198137 0 83.3333337 0.5666663000000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf291 2.68828782884 0 82.304166225 1.7937506624999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf292 2.22188270191 0 83.49166645 0.40833355000000326 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf293 3.10276702124 0 82.691666975 1.212499537499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf294 3.71861561279 0 81.845 2.4825000000000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf295 2.31264838619 0 81.90916705 2.3862494250000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf296 4.65131508141 0 81.721666225 2.6675006624999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf297 4.41041653795 0 81.8041672 2.5437492000000077 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf298 4.64777037234 0 81.7791666 2.5812501000000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf299 2.54362112272 0 81.86583315 2.4512502749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf300 3.9092714242 0 81.80166725 2.5474991250000087 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf301 4.71458655761 0 81.76500005 2.6024999250000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf302 2.99958912291 0 82.183333025 1.975000462500006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf303 4.15215580161 0 81.7650006 2.60249910000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf304 3.08724297596 0 82.7725004 1.0912494000000024 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf305 4.55102318011 0 81.726667575 2.65999863750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf306 4.05237118816 0 81.7183329 2.6725006500000106 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf307 3.65495824565 0 81.864166075 2.4537508875 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf308 3.58660356691 0 83.1533337 0.5199994499999931 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf309 4.15683977316 0 81.81250045 2.5312493249999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf310 2.2957876366 0 81.892500625 2.411249062500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf311 4.06936164451 0 81.83166655 2.5025001750000087 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf312 4.16443452935 0 81.8549995 2.467500749999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf313 4.12196594403 0 81.826667225 2.5099991625000087 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf314 2.80762145408 0 81.9374994 2.3437509000000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf315 3.77961048884 0 82.096666975 2.1049995374999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf316 3.8515884894 0 81.816667425 2.5249988624999915 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf317 4.19486603401 0 81.712499225 2.681251162499997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf318 4.0553073343 0 81.748333075 2.6275003874999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf319 3.4554349638 0 83.5124993 0.38750069999999825 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf320 4.47851561355 0 81.7700007 2.5949989500000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf321 3.5209555481 0 83.0466657 0.6800014499999918 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf322 4.66740793088 0 82.42833425 1.607498624999991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf323 3.0015015591 0 83.6899995 0.21000050000000103 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf324 2.47501547765 0 83.31749875 0.5825012500000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf325 3.5212084815 0 83.172499875 0.4912501875000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf326 3.50606233828 0 83.2108336 0.6891663999999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf327 2.92583901753 0 83.28500025 0.6149997500000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf328 3.40109920082 0 83.1758331 0.4862503499999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf329 3.56759398638 0 83.21000045 0.6899995500000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf330 3.55806857582 0 83.169166525 0.49625021250000856 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf331 3.43688168131 0 83.131667175 0.552499237499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf332 2.12603509822 0 83.31166615 0.5883338500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf333 3.34049231646 0 83.560832775 0.3391672250000056 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf334 3.1381527329 0 83.294165025 0.6058349750000019 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf335 3.30692068622 0 83.186666675 0.46999998750000316 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf336 2.55450450958 0 83.429165825 0.4708341750000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf337 3.2983460176 0 83.409999525 0.49000047499999655 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf338 2.40445977697 0 83.72166715 0.1783328499999982 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf339 2.47649388334 0 83.3458338 0.5541662000000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf340 2.80986450982 0 83.6108329 0.28916709999999457 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf341 3.19996945711 0 82.20916635 1.9362504749999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf342 3.15022966077 0 82.3558336 1.7162496000000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf343 3.18630666863 0 82.309166375 1.7862504374999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf344 3.09659686575 0 82.7841664 1.0737503999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf345 2.98635440336 0 82.5666672 1.3999992000000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf346 2.96599083939 0 83.289166375 0.6108336250000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf347 3.31764009092 0 83.229165675 0.6708343249999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf348 3.24530593811 0 82.744166375 1.133750437499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf349 3.1672312347 0 82.32083345 1.7687498250000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf350 3.35194019608 0 82.245000875 1.8824986874999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf351 3.0778774074 0 82.307499275 1.7887510875000032 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf352 3.33260277956 0 83.4599995 0.440000500000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf353 3.24103387077 0 82.2833328 1.825000800000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf354 3.18069340099 0 82.5133333 1.480000050000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf355 3.07429636563 0 82.3416668 1.737499800000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf356 3.15235879862 0 82.336666425 1.7450003624999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf357 3.24530593811 0 82.67416655 1.238750175000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf358 4.76954246445 0 82.019166125 2.221250812500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf359 4.449901633 0 81.9024998 2.3962502999999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf360 4.34207312855 0 82.398333475 1.6524997874999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf361 3.91922383284 0 82.130833025 2.0537504624999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf362 4.15854174264 0 82.52416725 1.4637491249999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf363 3.96230081862 0 82.055000075 2.167499887500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf364 2.79832270858 0 82.564165975 1.4037510375000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf365 3.3989410127 0 81.7099991 2.685001349999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 31 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf366 3.76110445166 0 82.480833025 1.5287504625000068 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf367 5.19355302987 0 81.9524996 2.321250600000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf368 3.31882787728 0 82.1508331 2.0237503500000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf369 4.96680102787 0 82.078333275 2.1325000874999915 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf370 2.24298393464 0 82.670833925 1.243749112500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf371 5.43694808897 0 82.025000375 2.212499437499993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf372 4.67197908065 0 83.112500825 0.5812487625000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf373 5.98685011161 0 82.0716662 2.1425007000000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf374 4.95751357757 0 82.541666875 1.4374996874999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf375 4.7705434053 0 82.6366661 1.295000850000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf376 6.11253055706 0 81.839166625 2.4912500624999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf377 5.13531236708 0 81.947499425 2.3287508624999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf378 5.68419530524 0 81.885832675 2.4212509874999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf379 3.3989410127 0 82.595833525 1.356249712499995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf380 3.3989410127 0 82.13083365 2.0537495250000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf381 5.26587015 0 82.03000015 2.2049997749999903 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf382 5.8542348193 0 81.706667125 2.6899993125000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf383 3.12871807395 0 81.815833525 2.5262497124999967 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf384 6.01904155181 0 81.811666275 2.53250058750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf385 4.20358583892 0 81.661666975 2.7574995374999958 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf386 3.91060095759 0 82.50666635 1.490000474999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf387 3.75751334685 0 82.062499925 2.156250112500004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf388 6.07933410004 0 82.0766671 2.134999350000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf389 5.73440892644 0 81.7983341 2.552498849999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf390 2.11827448839 0 82.49083305 1.5137504249999907 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf391 5.11910106906 0 82.3866668 1.6699997999999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf392 3.3989410127 0 82.1033331 2.0950003499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf393 3.25823638691 0 82.740000525 1.1399992125000011 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf394 2.87183708038 0 82.43166675 1.6024998749999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf395 6.09920856411 0 81.79083275 2.5637508749999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf396 5.05299439803 0 81.874166875 2.4387496874999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf397 5.55984761608 0 81.838333975 2.4924990375000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf398 6.20896956368 0 81.764166375 2.603750437499997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf399 2.08997085298 0 83.324998925 0.5750010749999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf400 2.67909425977 0 83.4074999 0.4925000999999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf401 3.39279520729 0 83.512500375 0.3874996249999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf402 3.74096285582 0 83.342499875 0.5575001249999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf403 2.72520189649 0 83.285000675 0.6149993249999938 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf404 2.47020919834 0 83.485832825 0.4141671749999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf405 3.52973327747 0 83.579999725 0.3200002750000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf406 3.16324896856 0 83.229167325 0.6708326749999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf407 3.53910737775 0 83.537499625 0.36250037500000476 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf408 2.46717848922 0 83.366667525 0.5333324750000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf409 3.14520849175 0 83.46833325 0.43166674999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf410 3.73983800903 0 83.396666425 0.5033335749999935 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf411 2.69718545933 0 83.42249985 0.4775001500000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf412 3.45563746073 0 83.18083305 0.4787504249999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf413 3.81594846135 0 83.05333355 0.6699996749999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf414 3.22592627458 0 83.51083305 0.3891669499999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf415 3.32121276575 0 83.50249925 0.3975007500000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf416 2.50486697002 0 83.28333315 0.6166668499999958 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf417 3.43033061199 0 83.376665675 0.5233343250000019 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf418 3.55185414537 0 83.323333775 0.5766662250000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf419 3.45180134988 0 83.13416695 0.5487495750000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf420 2.51737484435 0 83.319999875 0.5800001250000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf421 2.47649388334 0 83.281667075 0.6183329249999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf422 3.52973327747 0 83.4341669 0.4658331000000061 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf423 3.30543425366 0 83.2149995 0.6850004999999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf424 3.69546000476 0 83.3700008 0.5299992000000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf425 2.54866692533 0 83.23500055 0.6649994500000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf426 2.52414632919 0 83.30499955 0.5950004499999949 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf427 3.67819030212 0 83.410000125 0.48999987500000375 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf428 3.66653335987 0 83.6833336 0.2166664000000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf429 2.5369425715 0 83.452499925 0.4475000750000021 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf430 2.47501547765 0 83.296666925 0.6033330750000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf431 2.73512859106 0 83.1441661 0.5337508499999899 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf432 3.73049052619 0 83.43333345 0.4666665499999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf433 3.11116327032 0 83.4500002 0.44999979999999484 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf434 3.59288986667 0 83.6958339 0.20416610000000335 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf435 3.69363094479 0 83.466666825 0.4333331749999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf436 2.44405144164 0 83.6624992 0.23750080000000084 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf437 2.52414632919 0 83.298332625 0.6016673749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf438 3.43724518747 0 83.469165875 0.4308341249999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf439 2.47403085411 0 83.32166695 0.5783330500000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf440 3.10613698764 0 83.718333025 0.18166697499999318 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf441 3.43696375659 0 83.7041672 0.19583279999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf442 2.30377764101 0 83.07916665 0.6312500249999928 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf443 3.86195191894 0 82.57083345 1.3937498250000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf444 3.42141563349 0 82.7016671 1.1974993500000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf445 3.18878419794 0 82.9391669 0.8412496499999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf446 3.95857024721 0 83.29499975 0.6050002499999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf447 3.42141563349 0 82.6366663 1.2950005499999975 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf448 3.7537079845 0 82.355000175 1.717499737499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf449 3.89976572994 0 82.8266675 1.0099987500000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf450 3.70562610654 0 82.665833675 1.2512494874999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf451 3.88737706866 0 82.85666655 0.9650001750000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf452 4.09696503312 0 83.220833025 0.6791669749999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf453 4.11156497342 0 82.35666615 1.7150007750000071 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf454 3.8410452777 0 83.389999125 0.5100008749999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf455 3.71389879516 0 82.65583325 1.2662501249999991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf456 3.12961658197 0 82.4508332 1.573750199999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf457 3.54734999035 0 83.430000575 0.469999425000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf458 3.86395152513 0 82.4875004 1.5187493999999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf459 2.89974302229 0 82.382499875 1.6762501875000098 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf460 3.48886042646 0 82.7250005 1.1624992500000104 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf461 4.09292044776 0 83.33333365 0.5666663499999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf462 3.97558461307 0 82.384999075 1.6725013875000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf463 4.09022849113 0 83.3025002 0.5974998000000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf464 3.24530593811 0 82.56083355 1.4087496750000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf465 3.78717886042 0 82.84083325 0.9887501249999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf466 3.93879714412 0 82.830000125 1.004999812500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf467 4.10835433149 0 83.284166175 0.6158338250000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf468 3.90747388907 0 83.33583325 0.5641667500000068 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf469 2.53859661959 0 82.6516662 1.272500700000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf470 4.12303570384 0 82.400000275 1.6499995875000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf471 2.4825808753 0 83.890000725 0.009999275000006969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf472 4.03091892409 0 83.015833475 0.7262497875000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf473 3.94573901698 0 82.7941671 1.0587493500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf474 3.81804847244 0 82.634166775 1.2987498375000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf475 3.51197806787 0 82.906666375 0.8900004374999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf476 3.35148515003 0 82.644166625 1.2837500625000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf477 3.64680056168 0 82.7950001 1.0574998500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf478 3.72000418322 0 83.456667675 0.4433323249999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf479 4.13277783134 0 82.530832425 1.4537513625000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf480 4.26183413039 0 82.714165825 1.1787512625000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf481 3.23620317817 0 83.55666655 0.34333344999999726 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf482 3.42306292045 0 82.685000625 1.222499062499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf483 4.61790687055 0 82.47083265 1.5437510249999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf484 3.80271470043 0 82.503333875 1.4949991875000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf485 4.0096141492 0 82.503333175 1.4950002375000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf486 3.85955229037 0 82.624166675 1.3137499875000032 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf487 3.12961658197 0 82.385832425 1.6712513624999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf488 4.74816504674 0 82.915833575 0.8762496375000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf489 2.33564385687 0 82.100833175 2.098750237499992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf490 6.31231852253 0 81.88666685 2.419999725000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf491 5.91056159856 0 82.027500225 2.2087496625000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf492 4.09026452693 0 81.93333315 2.3500002750000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf493 2.29057910951 0 82.68499995 1.2225000749999921 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf494 3.31826782794 0 81.898333475 2.4024997874999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf495 2.47740126923 0 81.938332325 2.342501512499993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf496 4.86557422781 0 81.960833525 2.3087497125000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf497 4.39304255921 0 82.727500175 1.1587497374999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf498 3.73256635056 0 82.05666695 2.16499957500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf499 6.20896956368 0 81.7933327 2.56000095000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf500 3.86268155745 0 81.934167525 2.34874871249999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf501 6.01904155181 0 81.819999625 2.520000562500009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf502 6.35947216799 0 81.776667425 2.584998862500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf503 5.21477775932 0 81.994167525 2.258748712500008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf504 3.4313336476 0 82.8816661 0.9275008499999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf505 3.68328404231 0 82.05750025 2.163749624999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf506 5.92836922932 0 81.877500125 2.433749812500004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf507 3.80663695682 0 81.9374992 2.3437511999999927 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf508 5.05770084008 0 82.24583295 1.88125057500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf509 3.20777861875 0 82.08083205 2.128751925000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf510 3.45870051953 0 82.066666825 2.149999762500002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf511 5.69409031372 0 81.924167325 2.3637490125000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf512 6.06060941775 0 81.9683331 2.297500350000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf513 4.1918323886 0 82.824166075 1.0137508875000094 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf514 5.51311465307 0 82.0116664 2.2325004000000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf515 5.94347809389 0 81.9191666 2.3712501000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf516 4.65273283316 0 83.041665875 0.6875011874999899 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf517 2.6302261544 0 83.335834275 0.5641657250000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf518 3.22579199405 0 83.4974991 0.40250090000000116 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf519 3.27967368224 0 83.130833075 0.5537503875000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf520 3.43425412049 0 83.53666685 0.3633331499999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf521 3.28200602128 0 83.10166715 0.5974992750000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf522 3.59684205595 0 83.177500325 0.48374951250000464 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf523 3.59452470708 0 83.314166525 0.5858334749999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf524 2.46849388159 0 83.525832375 0.3741676250000069 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf525 3.59984947477 0 83.574999275 0.3250007250000039 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf526 2.56221404472 0 83.107499125 0.5887513124999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf527 2.47649388334 0 83.346667275 0.5533327249999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf528 3.45475224289 0 83.1041657 0.5937514500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf529 3.14847976496 0 83.299999275 0.6000007249999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf530 3.7245329101 0 82.9900001 0.7649998499999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf531 3.62667420737 0 83.133332425 0.5500013624999909 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf532 2.64505034404 0 83.412499775 0.48750022499999945 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf533 3.19448276073 0 83.324167425 0.5758325750000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf534 3.4521680168 0 83.34083295 0.5591670499999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf535 3.22348002932 0 83.42249985 0.4775001500000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf536 2.46542270496 0 83.481667075 0.41833292499999575 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf537 2.52778795522 0 83.486665925 0.4133340750000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf538 2.56221404472 0 83.144166975 0.5337495374999932 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf539 3.27823297285 0 83.20333445 0.6966655500000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf540 3.6180742183 0 82.60750005 1.338749925000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf541 3.92533229567 0 83.4591669 0.4408331000000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf542 3.58859383862 0 82.264166475 1.8537502875000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf543 3.22297604526 0 82.75916615 1.111250775000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf544 3.66372023461 0 82.59166585 1.3625012250000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf545 3.91002002291 0 82.5483341 1.427498849999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf546 3.96917380416 0 82.402500725 1.6462489125000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf547 3.95854211657 0 82.256666125 1.865000812500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf548 3.66981520647 0 82.52583375 1.4612493749999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf549 3.8906919752 0 82.56250095 1.4062485749999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf550 3.80542946014 0 82.589166625 1.3662500624999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf551 4.01038027961 0 83.020833375 0.7187499375000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf552 3.54824634447 0 82.628332675 1.3075009875000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf553 3.99739578291 0 82.5425003 1.4362495499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf554 3.42612797341 0 82.64000035 1.289999475000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf555 3.37950116826 0 82.5966667 1.3549999499999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf556 3.85260352333 0 82.516667575 1.4749986375000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf557 3.70250486116 0 83.482500475 0.4174995250000052 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf558 3.85830895124 0 82.7925002 1.0612496999999905 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf559 3.08261368468 0 82.583333825 1.3749992625000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf560 3.84859290112 0 82.804166725 1.0437499124999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf561 3.88910440715 0 82.49000105 1.5149984249999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf562 3.77712344616 0 82.583332925 1.3750006125000098 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf563 3.3516889331 0 82.640000425 1.289999362500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf564 3.24530593811 0 82.568332375 1.3975014375000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf565 3.76898427543 0 82.60916635 1.336250475000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf566 3.44754596993 0 82.272499425 1.8412508624999901 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf567 4.05924378827 0 82.56500135 1.4024979749999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf568 3.93740416705 0 82.522498825 1.4662517625000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf569 3.84132198203 0 82.5683344 1.3974984000000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf570 3.09230255687 0 82.6708328 1.2437508000000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf571 3.7426849711 0 82.5908331 1.3637503500000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf572 2.86454312858 0 83.1341669 0.5487496500000049 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf573 3.8409412107 0 82.574167475 1.3887487875000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf574 3.42777980223 0 82.648334175 1.2774987374999967 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf575 3.40403815603 0 83.51166655 0.38833344999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf576 3.88047602719 0 82.532500075 1.4512498874999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf577 3.77830204078 0 82.647500475 1.2787492874999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf578 3.42777980223 0 82.63249865 1.3012520249999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf579 4.04812478529 0 82.325000375 1.7624994374999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf580 3.42612797341 0 82.567500125 1.3987498125000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf581 2.71103437454 0 82.644166375 1.2837504375000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf582 2.71585198734 0 82.609166725 1.3362499125000085 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf583 3.94925800299 0 82.51000045 1.4849993249999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf584 5.94686699811 0 81.866666425 2.450000362499992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf585 6.08581936049 0 81.75666715 2.6149992750000024 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf586 5.8159977702 0 81.991667725 2.262498412500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf587 5.84038818508 0 81.9858334 2.2712498999999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf588 3.16200541504 0 81.93916565 2.34125152499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf589 6.13324261561 0 82.027500175 2.2087497375 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf590 3.40390894839 0 82.398333225 1.6525001624999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf591 5.96588342505 0 81.875832675 2.436250987500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf592 6.06542582931 0 81.75916635 2.6112504749999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf593 3.75822433713 0 82.285832875 1.8212506875000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf594 4.83568092525 0 82.5116665 1.482500249999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf595 2.92350480095 0 82.088333675 2.117499487499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf596 4.86061160899 0 82.13083405 2.0537489249999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf597 5.80915645539 0 82.07333335 2.139999975000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf598 4.77219926546 0 82.145833575 2.0312496375000038 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf599 4.95642590255 0 82.177500425 1.9837493624999922 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf600 6.3125953848 0 81.8175009 2.5237486500000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf601 5.68676212758 0 81.9325008 2.3512488000000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf602 4.87802723389 0 82.616667025 1.3249994625000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf603 4.24152951084 0 82.3408333 1.7387500500000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf604 4.86742481345 0 81.79666635 2.555000475000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf605 2.84079562042 0 81.7549991 2.6175013499999906 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf606 4.7209030777 0 82.50916645 1.4862503250000074 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf607 3.21429793651 0 82.037499825 2.1937502625000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf608 6.20896956368 0 81.806666375 2.540000437499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf609 6.11253055706 0 81.9508336 2.3237496000000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf610 5.72814889622 0 82.070833225 2.143750162499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf611 6.03912384738 0 81.9508329 2.323750650000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf612 4.82854259452 0 81.999166275 2.25125058750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf613 6.28005303148 0 81.999167275 2.2512490874999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf614 3.53322652378 0 81.8325003 2.50124954999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf615 5.54554314448 0 82.005833575 2.2412496375000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf616 5.92460400808 0 81.8916666 2.4125001000000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf617 6.24638811174 0 81.81916665 2.5212500250000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf618 3.60668252472 0 82.4383338 1.5924993000000072 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf619 5.00597538776 0 82.5850009 1.3724986500000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf620 3.50178718632 0 82.0274992 2.208751200000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf621 5.41498979223 0 81.926666575 2.360000137500002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf622 6.12963802972 0 81.939999775 2.340000337499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf623 2.84806440183 0 82.35833275 1.7125008749999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf624 4.53047135975 0 82.752499075 1.1212513874999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf625 6.45517159164 0 81.904999375 2.392500937499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf626 2.77533156099 0 83.51249885 0.38750114999999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf627 2.67681442383 0 83.54249975 0.3575002499999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf628 3.34131522534 0 83.388333375 0.511666624999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf629 3.91196048681 0 83.385832775 0.5141672250000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf630 3.67732734091 0 83.6216665 0.27833349999999657 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf631 3.17642385621 0 83.9708332 0.22916679999999873 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf632 3.7148603211 0 83.47666625 0.42333375000000617 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf633 3.550044405 0 83.379166975 0.5208330249999961 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf634 3.89833699237 0 83.465000775 0.4349992250000071 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf635 3.41246964545 0 83.380834025 0.5191659750000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf636 2.74258180683 0 83.38166735 0.5183326499999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf637 3.19236717145 0 83.629166025 0.2708339749999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf638 2.66820276722 0 83.504999725 0.39500027499999535 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf639 3.91196048681 0 83.510833025 0.3891669750000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf640 3.6987123875 0 83.473332775 0.4266672249999971 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf641 2.6908653869 0 83.450833775 0.44916622499999337 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf642 3.21949064499 0 83.1749996 0.48750059999998996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf643 3.8540168116 0 83.3374999 0.5625001000000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf644 3.56007386967 0 83.6800005 0.21999949999999446 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf645 3.33812673629 0 83.398332625 0.5016673750000052 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf646 4.04048626029 0 83.339167475 0.5608325250000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf647 3.85170222236 0 83.446665375 0.4533346250000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf648 3.90854569412 0 83.439166475 0.46083352500000674 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf649 3.19820449438 0 83.620000625 0.27999937499999705 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf650 3.56169130222 0 83.4466666 0.45333339999999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf651 2.47649388334 0 83.3058326 0.5941673999999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf652 3.84319484171 0 83.5466665 0.3533334999999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf653 3.61281589655 0 83.4683321 0.4316679000000022 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf654 3.45781979362 0 83.395832825 0.5041671750000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf655 3.66427338342 0 83.350832525 0.5491674749999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf656 3.73380488971 0 83.555833725 0.34416627500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf657 3.34017631976 0 83.901667075 0.29833292499999403 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf658 3.36416579236 0 83.4666676 0.43333240000000617 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf659 3.90051037143 0 83.377498975 0.5225010250000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf660 3.42864727797 0 83.366666 0.533334000000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf661 4.00593124871 0 83.6191658 0.28083419999999537 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf662 3.19455366876 0 83.6241671 0.27583290000000604 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf663 3.73050433946 0 83.313333525 0.5866664749999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf664 3.4587813675 0 83.399999225 0.5000007749999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf665 3.04008546135 0 83.599166675 0.3008333249999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf666 3.78060333145 0 83.4550007 0.44499930000000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf667 3.61962110787 0 83.460833125 0.4391668750000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf668 3.8810965693 0 83.2749996 0.625000399999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf669 3.33861347291 0 83.625000425 0.27499957500000394 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf670 2.56753588524 0 83.6249996 0.2750004000000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf671 2.66966635352 0 83.505833375 0.39416662500000543 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf672 3.62734932603 0 83.393333 0.5066670000000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf673 2.81757910985 0 83.399999475 0.5000005249999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf674 3.90909166028 0 83.35083335 0.549166649999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf675 3.67412584098 0 83.899166975 0.0008330250000000428 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf676 3.89711553068 0 83.522500025 0.37749997500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf677 3.62570087392 0 83.388333425 0.511666575000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf678 3.07523718155 0 82.7808333 1.0787500500000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf679 4.13425005433 0 82.42083265 1.6187510250000088 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf680 3.61960948722 0 82.8216667 1.0174999500000084 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf681 3.77350891881 0 83.4066669 0.4933330999999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf682 3.29849329899 0 82.699167225 1.2012491625000052 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf683 2.53422103757 0 82.725833325 1.1612500125000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf684 3.07598772844 0 82.70083335 1.1987499750000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf685 2.9984674801 0 82.483333625 1.5249995624999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf686 2.69034021678 0 82.911667225 0.8824991624999967 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf687 3.6121448006 0 82.63666585 1.295001225 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf688 3.72756318437 0 82.55749965 1.4137505250000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf689 3.55023099238 0 82.526666675 1.459999987499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf690 3.42777980223 0 82.684999375 1.2225009374999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf691 4.84352798061 0 82.450834125 1.5737488124999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf692 3.76776786291 0 82.559167175 1.4112492375000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf693 4.51358276297 0 82.44000035 1.589999474999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf694 3.72458652593 0 82.6208338 1.3187493000000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf695 3.76348907779 0 82.8208328 1.0187507999999923 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf696 3.69520391434 0 82.767499375 1.0987509375000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf697 3.66560916957 0 82.67916755 1.2312486749999962 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf698 3.77027202063 0 82.488333725 1.517499412499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf699 3.75097348493 0 82.7166668 1.174999800000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf700 2.45929083235 0 82.681665775 1.2275013375000015 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf701 3.23452197803 0 82.6866664 1.2200004000000106 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf702 3.78589104303 0 82.468333725 1.5474994125000094 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf703 3.67105883538 0 82.655833575 1.266249637499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf704 3.68966627876 0 82.65166675 1.272499874999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf705 3.76017408275 0 82.621666375 1.3175004374999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf706 3.24530593811 0 82.58416615 1.3737507749999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf707 2.8309510337 0 82.354167125 1.7187493124999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf708 3.65387014178 0 82.29333365 1.8099995250000092 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf709 3.71442465807 0 82.654166575 1.2687501374999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf710 3.14961884209 0 82.7824993 1.0762510500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf711 2.83638434085 0 82.5541664 1.4187504000000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf712 2.91601847724 0 82.573332825 1.3900007625000086 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf713 3.64186373922 0 82.65666695 1.2649995749999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf714 2.81202046932 0 82.533333575 1.4499996374999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf715 3.76108652872 0 82.46333325 1.5550001249999923 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf716 3.65879322305 0 82.645832475 1.281251287499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf717 3.70045394085 0 82.521666475 1.4675002874999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf718 3.72520627099 0 82.66583245 1.2512513250000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf719 3.24756557937 0 82.614999375 1.3275009375000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf720 3.30885003192 0 82.649167625 1.276248562499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf721 4.87113873596 0 82.313334075 1.7799988874999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf722 2.99807106899 0 82.5941672 1.3587491999999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf723 2.72823552171 0 82.50916655 1.486250174999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf724 2.80778843881 0 82.6516676 1.2724986000000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf725 3.78397139373 0 82.5358331 1.4462503499999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf726 3.54172278638 0 82.500833175 1.4987502375000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf727 3.24678797193 0 82.6966673 1.2049990499999979 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf728 3.25008734698 0 82.639999975 1.2900000375000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf729 3.45364629658 0 82.469166625 1.5462500625000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf730 3.51186658454 0 82.663332725 1.2550009124999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf731 3.64560643018 0 82.46166725 1.5574991249999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf732 3.56631738856 0 82.4725002 1.5412497000000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf733 4.44852035759 0 82.728333075 1.1575003875000078 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf734 3.83632344443 0 81.790833625 2.563749562499993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf735 5.34047298453 0 81.87749975 2.4337503750000025 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf736 5.65205072298 0 81.823333 2.515000499999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf737 3.09454478777 0 82.189167275 1.966249087499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf738 4.66234436114 0 81.882500525 2.4262492125000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf739 4.57988807369 0 82.72749975 1.1587503749999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf740 3.14265791087 0 82.407500075 1.6387498874999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf741 3.79743686888 0 82.55999965 1.410000525000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf742 5.37418600885 0 81.905000525 2.3924992124999918 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf743 5.04653302026 0 82.4066666 1.6400001000000088 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf744 4.13261299183 0 81.8124998 2.5312503000000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf745 5.72782322817 0 81.8375002 2.4937497000000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf746 5.05336757055 0 81.8708332 2.4437501999999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf747 5.39629361273 0 81.907500475 2.3887492874999907 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf748 4.5639539009 0 81.868333475 2.4474997875 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf749 2.48550160702 0 81.98249965 2.276250525000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf750 3.36713317478 0 82.3433332 1.7350001999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf751 4.25330235918 0 82.050833225 2.173750162499992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf752 4.73785735024 0 82.43083295 1.6037505750000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf753 5.75116942647 0 81.810832725 2.533750912500004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf754 4.08310313137 0 81.82083325 2.5187501249999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf755 3.69771415222 0 81.771667375 2.5924989374999896 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf756 4.5639539009 0 81.88916665 2.4162500249999894 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf757 3.61284787083 0 81.839166675 2.491249987499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf758 3.14794852044 0 83.18249955 0.47625067499999574 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf759 3.61297523069 0 83.109166025 0.5862509625000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf760 3.0859019816 0 83.58333355 0.31666644999999394 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf761 3.48258407644 0 83.072499225 0.6412511624999979 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf762 2.35625127316 0 83.69083365 0.20916634999999817 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf763 3.82892289873 0 83.0383339 0.6924991500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf764 3.23798952739 0 83.382500875 0.5174991249999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf765 2.46884718101 0 83.7450001 0.1549999000000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf766 2.89946744376 0 83.59416695 0.3058330499999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf767 3.9824491999 0 83.034165925 0.6987511125000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf768 2.26810900867 0 83.441667 0.45833300000000465 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf769 3.33442766268 0 83.4108334 0.4891665999999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf770 3.56238424705 0 83.114167025 0.5787494624999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf771 2.20628386166 0 83.8049997 0.09500030000000381 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf772 2.96054190359 0 83.446667125 0.4533328749999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf773 3.73646781666 0 83.210832725 0.689167274999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf774 3.45655139427 0 83.3616669 0.538333099999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf775 3.36781044562 0 83.41416585 0.48583414999999663 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf776 3.33711158829 0 83.400833525 0.4991664750000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf777 2.47649388334 0 83.294167725 0.6058322750000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf778 3.29795938368 0 83.4608333 0.4391666999999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf779 2.58901289839 0 83.224165925 0.6758340750000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf780 2.48110468365 0 83.166666625 0.5000000624999927 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf781 3.31743089602 0 83.52416585 0.3758341499999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf782 3.16532803511 0 83.4866665 0.4133335000000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf783 3.01209146756 0 83.39499915 0.5050008499999962 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf784 2.91506038019 0 83.454166025 0.44583397499999367 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf785 3.28419816756 0 83.439999375 0.4600006250000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf786 3.44231905129 0 83.079999875 0.6300001875000021 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf787 3.35346139693 0 83.4133336 0.4866663999999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf788 2.5733608853 0 83.475833225 0.42416677499999766 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf789 3.32302239408 0 83.463333575 0.4366664250000071 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf790 3.86085671563 0 83.331666975 0.5683330249999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf791 2.87563150959 0 83.081667125 0.6274993125000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf792 2.2667413651 0 83.2266664 0.6733336000000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf793 3.17877719405 0 83.10166685 0.5974997249999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf794 2.05169457405 0 83.546667075 0.353332924999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf795 3.92871521165 0 83.153333725 0.519999412500006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf796 2.871597607 0 83.05750035 0.663749475000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf797 3.51071987027 0 83.0066665 0.7400002500000085 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf798 3.25133864889 0 82.619998725 1.3200019124999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf799 3.02951773989 0 82.507500525 1.4887492125000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf800 3.27334017121 0 83.1308336 0.5537495999999962 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf801 3.77431743295 0 82.847500075 0.9787498875000011 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf802 3.15173939185 0 82.54416675 1.4337498749999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf803 3.20342071376 0 82.911666325 0.8825005125000018 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf804 3.76589200269 0 82.63416685 1.2987497250000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf805 3.97450408852 0 82.2916664 1.8125004000000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf806 3.72839205344 0 82.84916615 0.9762507749999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf807 3.83937218502 0 82.662499675 1.2562504874999902 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf808 3.46318308501 0 82.660000625 1.2599990625000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf809 2.65194478828 0 82.9966669 0.7549996500000091 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf810 3.81868246747 0 82.676666625 1.2350000625000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf811 2.668045357 0 82.687500775 1.2187488374999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf812 3.6406305738 0 83.044166575 0.6837501374999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf813 3.88869175025 0 82.619999925 1.3200001124999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf814 3.31960896231 0 82.582499525 1.3762507124999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf815 3.74455317852 0 83.011666325 0.7325005125000104 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf816 3.5137137361 0 82.635833375 1.2962499374999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf817 3.47803508789 0 82.671666975 1.2424995375000094 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf818 3.88869175025 0 82.5324997 1.4512504499999963 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf819 3.99015746851 0 83.32999975 0.5700002500000011 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf820 3.47633448052 0 82.70916585 1.1862512249999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf821 3.81672942872 0 82.66249985 1.2562502249999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf822 3.68765396647 0 83.035833175 0.6962502375000099 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf823 2.62468751826 0 83.85666555 0.04333444999999758 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf824 3.24530593811 0 82.596666725 1.3549999124999914 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf825 3.78887649269 0 82.6574997 1.2637504499999963 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf826 3.27825362995 0 83.098333475 0.602499787499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf827 4.04464703118 0 82.658332375 1.2625014374999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf828 4.0080827723 0 82.967499475 0.7987507875000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf829 3.81868246747 0 82.6308328 1.3037508000000102 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf830 2.97178100595 0 82.274999925 1.8375001124999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf831 3.6270584119 0 82.60749975 1.3387503749999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf832 4.0570665511 0 82.461666575 1.557500137500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf833 3.28457771051 0 83.106666975 0.589999537500006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf834 3.82073461682 0 82.628333725 1.3074994124999932 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf835 2.81694416161 0 82.9158347 0.8762479499999927 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf836 2.71225929471 0 82.91 0.8850000000000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf837 3.2770273421 0 82.669999925 1.2450001125000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf838 3.34881572161 0 82.45500065 1.5674990249999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf839 3.95513111625 0 82.567500925 1.3987486124999933 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf840 3.07936522986 0 82.384166725 1.6737499125 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf841 3.50904748658 0 82.551667075 1.4224993875000038 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf842 3.78763902744 0 82.524167675 1.4637484874999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf843 3.18013008622 0 82.7024998 1.1962503000000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf844 4.03626127129 0 82.5291655 1.4562517499999927 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf845 2.58905763401 0 82.68250105 1.2262484250000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf846 3.99304048761 0 82.58416755 1.3737486749999945 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf847 3.99528436929 0 82.64749975 1.2787503750000084 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf848 3.78763902744 0 82.690000125 1.2149998125000039 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf849 3.68134508359 0 82.605832975 1.3412505375000023 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf850 3.9513615958 0 82.637499625 1.2937505624999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf851 3.74332022224 0 81.716666775 2.6749998375000104 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf852 2.81716153181 0 82.6441663 1.2837505500000077 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf853 6.16037265169 0 81.943333075 2.3350003875000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf854 3.37055296019 0 81.936666475 2.3450002875000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf855 4.90505757241 0 82.631667175 1.302499237499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf856 4.91731542308 0 82.125833525 2.0612497124999933 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf857 5.10882133745 0 82.5716667 1.3924999500000084 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf858 5.91056159856 0 82.000000275 2.2499995874999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf859 3.59399896186 0 81.861666875 2.457499687500004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf860 3.8438175556 0 81.89499995 2.4075000750000015 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf861 5.10882133745 0 82.03250025 2.2012496250000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf862 3.24518027232 0 82.024166425 2.2137503624999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf863 4.45286607582 0 82.109999625 2.0850005624999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf864 2.66745181515 0 81.925832975 2.3612505374999913 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf865 3.8438175556 0 81.93416575 2.348751374999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf866 2.75141950611 0 83.235000425 0.6649995750000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf867 5.02396619108 0 81.964998375 2.30250243750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf868 6.30850043881 0 81.839167875 2.4912481874999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf869 2.71089204705 0 82.777500525 1.0837492125000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf870 3.37792709756 0 82.785001075 1.072498387500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf871 6.20896956368 0 81.805832675 2.5412509874999927 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf872 6.093601141 0 81.809166125 2.5362508124999934 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf873 6.17472027945 0 81.960000325 2.3099995125000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf874 4.37093352457 0 82.7258336 1.1612495999999979 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf875 5.7215125746 0 81.9375002 2.3437496999999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf876 4.84163016407 0 82.084999725 2.1225004124999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf877 6.26209688761 0 81.819166775 2.521249837500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf878 4.85663126202 0 82.5866659 1.3700011500000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf879 6.20896956368 0 81.853333725 2.4699994125000018 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf880 3.15632103776 0 83.2983335 0.6016665000000018 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf881 3.80890289184 0 81.777500975 2.583748537500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf882 4.31381299305 0 83.01000025 0.7349996249999933 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf883 3.79575894348 0 82.3091669 1.7862496500000091 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf884 6.01904155181 0 81.837499775 2.4937503375000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf885 3.09816021067 0 82.4133333 1.6300000499999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf886 2.53423568639 0 82.0749991 2.137501350000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf887 3.92665454118 0 81.950833375 2.323749937499997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf888 5.43694808897 0 81.838333275 2.492500087500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf889 5.43694808897 0 81.915832175 2.3762517374999916 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf890 3.72936814517 0 83.34833285 0.5516671499999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf891 3.50391619891 0 83.436665475 0.46333452500000194 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf892 3.31248767312 0 83.2858336 0.6141663999999963 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf893 3.7499123954 0 83.4508327 0.4491672999999935 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp16 1 ------ -+++++ -conf894 3.44261287135 0 83.47166675 0.4283332500000029 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf895 3.69707003451 0 83.4391661 0.4608339000000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf896 3.51958469414 0 83.454166025 0.44583397499999367 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf897 3.3941522599 0 83.4208333 0.47916670000000183 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf898 2.50486697002 0 83.303333425 0.5966665749999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf899 3.69734139746 0 83.716666025 0.18333397500000503 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf900 3.53539394872 0 83.4691667 0.43083329999999764 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf901 2.81481595232 0 83.30999995 0.5900000499999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf902 3.59517889854 0 83.304999975 0.5950000249999988 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp16 1 ------ -+++++ -conf903 2.7921225095 0 83.417500225 0.4824997750000023 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf904 3.63331458464 0 83.449166125 0.45083387499999505 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf905 3.47299403328 0 83.55583285 0.3441671499999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf906 3.44718329478 0 83.274167675 0.625832324999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf907 3.68820488322 0 83.474166075 0.4258339250000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf908 3.54345165631 0 83.48249965 0.41750035000000596 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf909 3.71451789948 0 83.427500375 0.4724996250000061 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf910 3.41501347545 0 83.6241676 0.2758323999999931 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf911 2.35625127316 0 83.7183326 0.18166740000000348 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf912 3.11476765562 0 83.430000525 0.46999947500000305 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp16 1 ------ -+++++ -conf913 2.54844128088 0 83.651667225 0.24833277500000295 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf914 3.47560954513 0 83.185000275 0.4724995875000104 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf915 2.47649388334 0 83.309999775 0.5900002250000057 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 ------ -+++++ -conf916 3.63349441695 0 83.54916695 0.35083305 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf917 3.30388414795 0 83.359167275 0.5408327249999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp16 1 ------ -+++++ -conf918 3.77986574157 0 83.4441673 0.4558326999999963 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf919 3.67920746343 0 83.2683346 0.6316653999999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp16 1 ------ -+++++ -conf920 2.70850046994 0 83.616666525 0.2833334750000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp16 1 -84 gpu softmax fp16 1 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt deleted file mode 100644 index ff7fdbf108c1cbca0154d6c300cd3ebbdaf7cd6d..0000000000000000000000000000000000000000 --- a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/autotuner_data/tuner_promise_confs_batch220_multi2.txt +++ /dev/null @@ -1,89001 +0,0 @@ -+++++ -conf1 1 0 83.5 0 -1 gpu conv fp32 1 -2 gpu batchnorm fp32 1 -3 gpu relu fp32 1 -4 gpu group_conv fp32 1 -5 gpu batchnorm fp32 1 -6 gpu relu fp32 1 -7 gpu conv fp32 1 -8 gpu batchnorm fp32 1 -9 gpu relu fp32 1 -10 gpu group_conv fp32 1 -11 gpu batchnorm fp32 1 -12 gpu relu fp32 1 -13 gpu conv fp32 1 -14 gpu batchnorm fp32 1 -15 gpu relu fp32 1 -16 gpu group_conv fp32 1 -17 gpu batchnorm fp32 1 -18 gpu relu fp32 1 -19 gpu conv fp32 1 -20 gpu batchnorm fp32 1 -21 gpu relu fp32 1 -22 gpu group_conv fp32 1 -23 gpu batchnorm fp32 1 -24 gpu relu fp32 1 -25 gpu conv fp32 1 -26 gpu batchnorm fp32 1 -27 gpu relu fp32 1 -28 gpu group_conv fp32 1 -29 gpu batchnorm fp32 1 -30 gpu relu fp32 1 -31 gpu conv fp32 1 -32 gpu batchnorm fp32 1 -33 gpu relu fp32 1 -34 gpu group_conv fp32 1 -35 gpu batchnorm fp32 1 -36 gpu relu fp32 1 -37 gpu conv fp32 1 -38 gpu batchnorm fp32 1 -39 gpu relu fp32 1 -40 gpu group_conv fp32 1 -41 gpu batchnorm fp32 1 -42 gpu relu fp32 1 -43 gpu conv fp32 1 -44 gpu batchnorm fp32 1 -45 gpu relu fp32 1 -46 gpu group_conv fp32 1 -47 gpu batchnorm fp32 1 -48 gpu relu fp32 1 -49 gpu conv fp32 1 -50 gpu batchnorm fp32 1 -51 gpu relu fp32 1 -52 gpu group_conv fp32 1 -53 gpu batchnorm fp32 1 -54 gpu relu fp32 1 -55 gpu conv fp32 1 -56 gpu batchnorm fp32 1 -57 gpu relu fp32 1 -58 gpu group_conv fp32 1 -59 gpu batchnorm fp32 1 -60 gpu relu fp32 1 -61 gpu conv fp32 1 -62 gpu batchnorm fp32 1 -63 gpu relu fp32 1 -64 gpu group_conv fp32 1 -65 gpu batchnorm fp32 1 -66 gpu relu fp32 1 -67 gpu conv fp32 1 -68 gpu batchnorm fp32 1 -69 gpu relu fp32 1 -70 gpu group_conv fp32 1 -71 gpu batchnorm fp32 1 -72 gpu relu fp32 1 -73 gpu conv fp32 1 -74 gpu batchnorm fp32 1 -75 gpu relu fp32 1 -76 gpu group_conv fp32 1 -77 gpu batchnorm fp32 1 -78 gpu relu fp32 1 -79 gpu conv fp32 1 -80 gpu batchnorm fp32 1 -81 gpu relu fp32 1 -82 gpu pool_mean fp32 1 -83 gpu mul fp32 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf1 4.15413017186 0 83.163334475 0.5049982875000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf2 2.98991537361 0 83.386665875 0.5133341249999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf3 3.88159289347 0 83.2783331 0.6216669000000025 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf4 4.14749473048 0 83.220000325 0.6799996749999991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf5 4.4175200707 0 83.219999875 0.6800001250000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf6 4.43502212401 0 83.155834675 0.5162479875000088 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf7 4.10832403497 0 83.103333275 0.5950000875000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf8 3.31453105661 0 82.59083295 1.3637505749999903 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf9 3.52220799908 0 82.4716658 1.542501300000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf10 3.44814122333 0 82.51916615 1.4712507749999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf11 3.02800023045 0 82.64999965 1.275000525000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf12 3.68207420915 0 82.448332775 1.5775008375000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf13 3.862916011 0 82.7708336 1.0937495999999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf14 3.7573272945 0 82.422500075 1.6162498874999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf15 3.93066025121 0 82.594167 1.3587495000000018 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf16 3.87800695966 0 82.430000125 1.6049998124999902 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf17 3.68207420915 0 82.56333275 1.405000874999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf18 3.22097285505 0 83.564167475 0.33583252499999505 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf19 3.82500219093 0 82.9275003 0.8587495499999918 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf20 3.90066717558 0 82.329165975 1.7562510375000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf21 3.86501599073 0 83.115000925 0.5774986124999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf22 3.40096875474 0 82.72416595 1.1637510750000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf23 3.5538161637 0 82.9700005 0.7949992500000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf24 3.17344943111 0 83.00083265 0.74875102499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf25 2.76788477576 0 82.447499725 1.5787504124999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf26 3.92397614204 0 83.174999825 0.48750026249999934 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf27 3.4092100078 0 83.1424999 0.5362501499999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf28 3.03961006636 0 82.704167175 1.1937492375000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf29 3.62973730797 0 83.085833325 0.6212500125000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf30 2.81140054286 0 82.4325003 1.6012495499999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf31 3.18575701105 0 82.52833345 1.4574998249999922 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf32 3.42595978009 0 82.7708333 1.09375004999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf33 3.17255385439 0 82.7233329 1.165000649999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf34 3.6391339197 0 82.831667325 1.002499012500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf35 2.72368244288 0 83.034168075 0.698747887500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf36 3.44714364594 0 82.539999575 1.440000637500006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf37 3.85171694927 0 83.137500575 0.5437491374999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf38 3.02151032351 0 83.1958331 0.4562503499999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf39 3.92280583455 0 83.017499875 0.7237501875000021 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf40 4.15840004076 0 82.141666525 2.0375002124999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf41 2.89589235375 0 82.634166725 1.2987499125 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf42 6.16453028593 0 81.9308327 2.3537509500000056 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf43 4.7463107647 0 81.9616666 2.3075000999999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf44 5.84575268801 0 81.983333575 2.2749996374999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf45 3.45773167067 0 82.297500375 1.8037494375000023 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf46 4.34036485844 0 82.463333875 1.5549991874999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf47 4.75207062649 0 81.74500045 2.6324993249999906 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf48 6.09000225926 0 81.9883331 2.2675003499999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf49 5.25532208128 0 81.92083345 2.3687498249999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf50 4.35262313423 0 82.356666825 1.7149997624999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf51 5.25144034242 0 81.9350005 2.3474992499999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf52 5.32967222406 0 81.9616671 2.3074993500000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf53 4.13210954206 0 83.056666875 0.6649996874999928 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf54 3.93967771859 0 81.763332525 2.6050012125000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf55 4.13872474867 0 82.631666775 1.302499837500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf56 2.7690518229 0 81.93666605 2.345000925000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf57 5.60283597265 0 81.9233338 2.364999300000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf58 4.45486021161 0 82.0608328 2.1587508 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf59 4.22738367053 0 82.226667075 1.909999387500008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf60 6.10852785257 0 81.959167325 2.3112490125000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf61 4.98692149992 0 81.822500025 2.516249962500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf62 6.11662384336 0 81.9808335 2.278749749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf63 5.97727027928 0 82.0224998 2.2162502999999916 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf64 2.93382347771 0 81.85416565 2.468751525000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf65 5.54950778131 0 81.73833355 2.6424996750000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf66 6.11662384336 0 81.889999075 2.4150013874999914 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf67 3.82767121119 0 82.26583335 1.8512499750000089 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf68 4.99324893801 0 81.816667 2.524999500000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf69 5.74180480491 0 81.889166525 2.4162502125000103 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf70 2.8416794212 0 82.981666775 0.7774998375000095 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf71 5.63492586502 0 81.993333575 2.259999637500009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf72 3.83421974764 0 83.207499825 0.6925001750000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf73 1.87482212142 0 82.811667125 1.0324993125000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf74 4.43826460769 0 81.75166725 2.6224991250000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf75 5.03123041946 0 82.306666375 1.790000437499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf76 4.2701321542 0 82.1975002 1.9537497000000101 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf77 3.8174916299 0 83.4608337 0.4391663000000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf78 3.54707867194 0 83.409999875 0.4900001250000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf79 3.90991089555 0 83.46999955 0.43000045000000287 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf80 3.67974499409 0 83.3833332 0.5166668000000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf81 2.82550849059 0 83.28583315 0.614166849999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf82 2.77356236628 0 83.60833335 0.2916666500000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf83 2.54319644535 0 83.399166475 0.5008335249999988 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf84 3.73175797849 0 83.50083335 0.3991666500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf85 3.36226524472 0 83.472500175 0.4274998250000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf86 3.00745254477 0 83.33583355 0.5641664499999962 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf87 3.95482984539 0 83.289165675 0.6108343249999933 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf88 2.89889994154 0 83.269167175 0.6308328249999932 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf89 3.60299625636 0 83.3791664 0.5208335999999975 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf90 3.13621575975 0 83.54083385 0.3591661500000015 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf91 2.61388509814 0 83.457500225 0.44249977499999604 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf92 4.05930607617 0 83.1716666 0.492500100000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf93 3.73175797849 0 83.575000225 0.32499977500000343 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf94 3.76274140853 0 83.47916685 0.42083315000000143 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf95 3.20332969056 0 83.850833275 0.04916672500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf96 3.64570944225 0 83.45249935 0.4475006500000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf97 3.85035669633 0 83.5608333 0.33916670000000126 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf98 3.54829526922 0 83.25249975 0.6475002500000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf99 3.61899339422 0 83.278334075 0.6216659249999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf100 3.28254525212 0 83.489167025 0.4108329749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf101 3.50816224551 0 83.252499725 0.6475002749999931 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf102 4.10549146346 0 83.3416668 0.5583332000000013 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf103 3.36715352889 0 83.584166725 0.31583327499999714 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf104 3.1088246435 0 83.1591665 0.5112502500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf105 3.47488338292 0 83.388333125 0.5116668749999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf106 3.59538418566 0 83.5300007 0.3699992999999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf107 3.97286473272 0 83.537499975 0.3625000249999971 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf108 3.8174916299 0 83.451667075 0.4483329249999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf109 3.46345463754 0 83.462500825 0.4374991749999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf110 3.556746151 0 83.424999425 0.47500057500000425 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf111 3.96049527585 0 83.53333295 0.3666670499999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf112 3.9715961288 0 83.32833325 0.57166675 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf113 3.93130152041 0 82.56666695 1.3999995750000025 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf114 2.8668123125 0 82.998333125 0.7525003124999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf115 2.64845545339 0 82.965000875 0.8024986874999982 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf116 3.14597582271 0 82.849167475 0.9762487874999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf117 2.72482912735 0 83.4741666 0.42583339999999625 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf118 3.95103617451 0 82.3491667 1.7262499500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf119 3.00588110745 0 83.02166615 0.7175007749999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf120 3.28000196808 0 82.762500375 1.1062494374999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf121 3.47173739276 0 82.802499825 1.0462502625000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf122 2.61621549789 0 83.197500425 0.4537493624999982 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf123 2.95549421538 0 83.010000475 0.7349992875000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf124 3.88669230643 0 82.45333265 1.5700010250000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf125 2.83364863532 0 82.581667225 1.3774991624999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf126 3.08002863884 0 82.504999925 1.4925001124999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf127 2.00048974491 0 82.50666635 1.490000474999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf128 2.73765608296 0 82.255832625 1.8662510625000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf129 4.06487927094 0 83.13333375 0.5499993749999916 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf130 4.09684344986 0 83.003333475 0.7449997874999923 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf131 3.21849911232 0 82.29583385 1.806249225000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf132 2.04860322208 0 82.465833275 1.5512500875000086 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf133 3.80144895722 0 82.52833315 1.4575002750000081 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf134 2.56619518427 0 82.785834275 1.0712485875000013 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf135 3.62695395201 0 82.454165975 1.5687510375000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf136 3.89727045934 0 82.97500015 0.7874997750000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf137 4.12276718448 0 82.457499725 1.5637504124999921 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf138 3.60700810131 0 82.545832825 1.4312507624999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf139 4.0205364833 0 81.89083385 2.4137492249999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf140 4.11840212461 0 81.8091669 2.536249650000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf141 3.01049479281 0 82.019999875 2.2200001875000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf142 3.85335942385 0 82.0483325 2.177501249999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf143 2.5026299742 0 81.625833925 2.8112491125000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf144 3.43886954105 0 82.726666675 1.1599999874999938 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf145 3.73017005141 0 82.89583355 0.9062496749999909 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf146 3.15732515345 0 81.805000075 2.542499887500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf147 3.98632041312 0 82.07166655 2.142500174999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf148 3.80024443647 0 82.9274992 0.8587512000000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf149 4.08467265051 0 82.004166025 2.2437509624999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf150 4.02990448369 0 82.30916665 1.7862500250000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf151 3.90355228103 0 81.8025006 2.546249099999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf152 3.94843898601 0 81.8916664 2.412500399999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf153 4.12751348406 0 81.888333875 2.417499187499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf154 4.02515780566 0 81.9924993 2.2612510499999914 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf155 3.83482666749 0 82.1449991 2.0325013499999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf156 3.8517164764 0 82.2041664 1.9437503999999919 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf157 4.10598132256 0 81.954167525 2.318748712499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf158 3.31478397356 0 81.75083395 2.623749074999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf159 3.96623260541 0 82.226666875 1.9099996874999903 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf160 4.04782353886 0 82.049166175 2.1762507375000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf161 4.02858926028 0 81.793333275 2.5600000875000077 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf162 3.00292158132 0 81.6974989 2.703751650000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf163 3.29748236022 0 82.1050001 2.092499850000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf164 4.0362576555 0 82.2466675 1.8799987499999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf165 3.05446537337 0 82.1716669 1.992499649999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf166 3.9071658065 0 82.190832875 1.9637506875000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf167 3.93287113327 0 82.12666665 2.0600000249999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf168 4.04478954767 0 81.856666625 2.465000062499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf169 3.61558117477 0 81.9016671 2.397499350000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf170 4.30825679247 0 81.851666825 2.472499762500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf171 3.69363094091 0 82.946667475 0.8299987875000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf172 2.77993236963 0 82.4766672 1.5349992000000086 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf173 2.54145510026 0 82.7925007 1.0612489499999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf174 3.87860542119 0 82.008333625 2.237499562499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf175 3.56868994119 0 82.02499975 2.2125003749999905 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 36 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf176 2.23770347257 0 83.0258332 0.7112502000000092 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf177 2.49861746763 0 82.984166525 0.7737502124999907 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf178 2.02993947881 0 83.30250015 0.5974998499999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf179 2.20017225716 0 83.1766651 0.48500235000000913 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf180 2.71551751125 0 83.228333425 0.6716665749999976 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf181 2.06184549766 0 83.031666575 0.702500137499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf182 2.77617639439 0 82.328333125 1.7575003124999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf183 3.8694276968 0 82.7800006 1.079999100000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf184 2.34590457627 0 82.2816663 1.8275005500000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf185 4.07407440381 0 82.244165975 1.883751037499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf186 3.11780856309 0 82.428332975 1.6075005374999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf187 3.58558835651 0 82.3399998 1.7400002999999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf188 3.85234242953 0 82.34583265 1.7312510249999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf189 2.7074193437 0 82.325833575 1.7612496374999935 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf190 4.05895393605 0 82.3066669 1.7899996500000057 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf191 3.78103563563 0 82.3833346 1.6749981000000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf192 3.7929361233 0 82.3158331 1.7762503499999909 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf193 2.97917012062 0 82.399166375 1.6512504375000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf194 3.70896846547 0 82.84583335 0.9812499749999901 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf195 3.05031465583 0 82.129166025 2.0562509624999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf196 4.76953621711 0 81.905 2.3924999999999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf197 4.82068705485 0 82.02250055 2.2162491749999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf198 3.40685460008 0 82.039999825 2.190000262500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf199 3.7406185613 0 82.309166475 1.7862502875000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf200 3.09685498241 0 82.1758326 1.9862510999999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf201 3.23081977958 0 82.9983336 0.7524996000000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf202 3.96567454672 0 82.4983326 1.5025011000000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf203 4.59326226068 0 82.658333425 1.2624998625000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf204 4.03800709024 0 82.6316668 1.3024997999999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf205 3.42928358185 0 82.031665675 2.202501487500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf206 3.32221289747 0 82.153333675 2.0199994875000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf207 5.66794988438 0 81.89250005 2.411249925000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf208 2.71874001219 0 82.497499925 1.5037501125000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf209 3.03474416486 0 82.05833335 2.162499975000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf210 3.88176217612 0 82.449999725 1.5750004125000032 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf211 3.54762785706 0 83.89833405 0.0016659499999974736 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf212 3.05039482856 0 82.7850004 1.072499399999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf213 2.82821126308 0 82.680833325 1.228750012500008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf214 3.81477730635 0 82.135832925 2.046250612499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf215 3.39722289075 0 83.580000125 0.31999987500000204 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf216 3.89111320826 0 82.1216669 2.067499650000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf217 3.50253383593 0 82.2108337 1.9337494500000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf218 4.05813557147 0 82.50000075 1.4999988750000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf219 3.26770542063 0 83.100000825 0.5999987625000074 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf220 3.18828477511 0 82.7100005 1.18499924999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 31 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf221 2.71225509774 0 82.55166665 1.422500024999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf222 3.87817450174 0 82.5324995 1.4512507499999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf223 4.03339162129 0 82.4558336 1.566249599999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf224 3.91264706835 0 82.13083465 2.0537480250000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf225 3.06837262281 0 82.53416635 1.44875047499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 26 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf226 3.69764932636 0 81.918333375 2.3724999374999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf227 2.43486575166 0 82.865833525 0.951249712500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf228 2.32359233423 0 82.09833355 2.10249967499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf229 3.42928358185 0 82.219165975 1.9212510375000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf230 3.55619894808 0 81.826667025 2.509999462499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 21 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf231 2.86694058251 0 82.870833375 0.9437499374999945 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf232 4.88375274604 0 81.896667125 2.40499931250001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf233 3.28938021997 0 82.0241669 2.213749650000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 30 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf234 4.39309472823 0 81.828332925 2.507500612500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf235 3.79357213589 0 83.575833175 0.3241668250000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf236 2.12486885207 0 83.46416625 0.4358337499999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf237 3.95836341588 0 83.341666975 0.5583330250000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf238 2.25904057642 0 83.567499 0.33250100000000204 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf239 3.39599428853 0 83.3233327 0.5766673000000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf240 2.27086134134 0 83.0875002 0.6187497000000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf241 2.25812915866 0 83.616666775 0.28333322500000124 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf242 2.12981011418 0 83.4783326 0.42166739999999836 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf243 2.57878675932 0 83.4525004 0.4474995999999948 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf244 2.61999068304 0 83.43166675 0.4683332499999949 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf245 2.62385834639 0 83.3758336 0.5241663999999929 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf246 3.78695562862 0 83.406667325 0.4933326749999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf247 3.25056012417 0 83.410000075 0.4899999250000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf248 2.64116522688 0 83.459999775 0.440000225 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf249 3.87266738703 0 83.2074999 0.6925000999999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf250 3.1762968602 0 83.257500125 0.642499874999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf251 2.60635725011 0 83.481666275 0.4183337250000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf252 2.25521755755 0 83.1966667 0.45499995000000837 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf253 3.2990666889 0 83.4449997 0.45500030000000324 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf254 2.87427851974 0 83.0825007 0.6262489500000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf255 2.55397266535 0 83.6124996 0.28750039999999333 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf256 3.90339685542 0 83.2258328 0.6741671999999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf257 3.3158753237 0 83.366665625 0.533334375000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf258 2.71891142175 0 83.264168025 0.6358319749999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf259 2.07975389368 0 83.454165825 0.445834174999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf260 2.7013324964 0 83.281666425 0.6183335750000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf261 2.27247670758 0 83.3800005 0.5199995000000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf262 3.79597347164 0 83.321666575 0.5783334250000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf263 2.98573825971 0 83.297500075 0.6024999249999979 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf264 2.6496606385 0 83.2325002 0.667499799999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf265 3.60943759784 0 83.4424997 0.45750030000000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf266 2.9593393986 0 83.5075001 0.39249989999999857 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf267 3.41172685129 0 83.2766666 0.6233334000000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf268 2.38589860172 0 83.311666475 0.5883335250000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf269 2.12944547394 0 83.467499575 0.43250042499999497 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf270 3.20776362372 0 83.172499575 0.491250637499995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf271 3.95396064036 0 83.240833525 0.6591664750000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf272 3.92038291833 0 82.4716655 1.5425017499999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf273 2.55641477625 0 83.0358341 0.6962488499999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf274 3.38281248672 0 82.490833825 1.5137492625000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf275 3.26886223339 0 82.390833125 1.6637503124999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf276 2.88984083604 0 83.253333025 0.6466669749999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf277 2.55175275146 0 82.50333375 1.4949993750000061 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf278 2.62376471646 0 82.295832175 1.8062517374999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf279 2.89529880947 0 82.244166875 1.8837496874999928 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf280 3.20641600424 0 82.4850002 1.5224996999999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf281 3.35812188872 0 82.4258341 1.6112488499999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf282 2.49677940941 0 82.71750005 1.1737499250000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf283 3.38120370248 0 82.390834075 1.6637488874999988 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf284 2.61676165298 0 82.824166125 1.0137508124999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 36 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf285 2.95471428151 0 83.240000175 0.6599998249999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf286 3.16800384081 0 82.459167225 1.5612491624999976 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf287 3.29072931971 0 82.5541669 1.4187496500000023 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf288 3.86883458805 0 82.56500035 1.402499474999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf289 3.53914203218 0 83.42500025 0.4749997500000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf290 2.88656198137 0 83.3333337 0.5666663000000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf291 2.68828782884 0 82.304166225 1.7937506624999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf292 2.22188270191 0 83.49166645 0.40833355000000326 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 34 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf293 3.10276702124 0 82.691666975 1.212499537499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf294 3.71861561279 0 81.845 2.4825000000000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf295 2.31264838619 0 81.90916705 2.3862494250000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf296 4.65131508141 0 81.721666225 2.6675006624999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf297 4.41041653795 0 81.8041672 2.5437492000000077 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf298 4.64777037234 0 81.7791666 2.5812501000000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf299 2.54362112272 0 81.86583315 2.4512502749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf300 3.9092714242 0 81.80166725 2.5474991250000087 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf301 4.71458655761 0 81.76500005 2.6024999250000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf302 2.99958912291 0 82.183333025 1.975000462500006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf303 4.15215580161 0 81.7650006 2.60249910000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf304 3.08724297596 0 82.7725004 1.0912494000000024 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf305 4.55102318011 0 81.726667575 2.65999863750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf306 4.05237118816 0 81.7183329 2.6725006500000106 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf307 3.65495824565 0 81.864166075 2.4537508875 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf308 3.58660356691 0 83.1533337 0.5199994499999931 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf309 4.15683977316 0 81.81250045 2.5312493249999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf310 2.2957876366 0 81.892500625 2.411249062500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf311 4.06936164451 0 81.83166655 2.5025001750000087 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf312 4.16443452935 0 81.8549995 2.467500749999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf313 4.12196594403 0 81.826667225 2.5099991625000087 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf314 2.80762145408 0 81.9374994 2.3437509000000105 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf315 3.77961048884 0 82.096666975 2.1049995374999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf316 3.8515884894 0 81.816667425 2.5249988624999915 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf317 4.19486603401 0 81.712499225 2.681251162499997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf318 4.0553073343 0 81.748333075 2.6275003874999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf319 3.4554349638 0 83.5124993 0.38750069999999825 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf320 4.47851561355 0 81.7700007 2.5949989500000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf321 3.5209555481 0 83.0466657 0.6800014499999918 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf322 4.66740793088 0 82.42833425 1.607498624999991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 3 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf323 3.0015015591 0 83.6899995 0.21000050000000103 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf324 2.47501547765 0 83.31749875 0.5825012500000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf325 3.5212084815 0 83.172499875 0.4912501875000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf326 3.50606233828 0 83.2108336 0.6891663999999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf327 2.92583901753 0 83.28500025 0.6149997500000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf328 3.40109920082 0 83.1758331 0.4862503499999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf329 3.56759398638 0 83.21000045 0.6899995500000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf330 3.55806857582 0 83.169166525 0.49625021250000856 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf331 3.43688168131 0 83.131667175 0.552499237499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf332 2.12603509822 0 83.31166615 0.5883338500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf333 3.34049231646 0 83.560832775 0.3391672250000056 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf334 3.1381527329 0 83.294165025 0.6058349750000019 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf335 3.30692068622 0 83.186666675 0.46999998750000316 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf336 2.55450450958 0 83.429165825 0.4708341750000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf337 3.2983460176 0 83.409999525 0.49000047499999655 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf338 2.40445977697 0 83.72166715 0.1783328499999982 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf339 2.47649388334 0 83.3458338 0.5541662000000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf340 2.80986450982 0 83.6108329 0.28916709999999457 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf341 3.19996945711 0 82.20916635 1.9362504749999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf342 3.15022966077 0 82.3558336 1.7162496000000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf343 3.18630666863 0 82.309166375 1.7862504374999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf344 3.09659686575 0 82.7841664 1.0737503999999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf345 2.98635440336 0 82.5666672 1.3999992000000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf346 2.96599083939 0 83.289166375 0.6108336250000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf347 3.31764009092 0 83.229165675 0.6708343249999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf348 3.24530593811 0 82.744166375 1.133750437499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf349 3.1672312347 0 82.32083345 1.7687498250000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf350 3.35194019608 0 82.245000875 1.8824986874999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf351 3.0778774074 0 82.307499275 1.7887510875000032 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf352 3.33260277956 0 83.4599995 0.440000500000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf353 3.24103387077 0 82.2833328 1.825000800000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf354 3.18069340099 0 82.5133333 1.480000050000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf355 3.07429636563 0 82.3416668 1.737499800000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf356 3.15235879862 0 82.336666425 1.7450003624999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf357 3.24530593811 0 82.67416655 1.238750175000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf358 4.76954246445 0 82.019166125 2.221250812500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf359 4.449901633 0 81.9024998 2.3962502999999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 29 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf360 4.34207312855 0 82.398333475 1.6524997874999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf361 3.91922383284 0 82.130833025 2.0537504624999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf362 4.15854174264 0 82.52416725 1.4637491249999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf363 3.96230081862 0 82.055000075 2.167499887500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf364 2.79832270858 0 82.564165975 1.4037510375000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf365 3.3989410127 0 81.7099991 2.685001349999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 31 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf366 3.76110445166 0 82.480833025 1.5287504625000068 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf367 5.19355302987 0 81.9524996 2.321250600000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf368 3.31882787728 0 82.1508331 2.0237503500000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf369 4.96680102787 0 82.078333275 2.1325000874999915 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf370 2.24298393464 0 82.670833925 1.243749112500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf371 5.43694808897 0 82.025000375 2.212499437499993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf372 4.67197908065 0 83.112500825 0.5812487625000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf373 5.98685011161 0 82.0716662 2.1425007000000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf374 4.95751357757 0 82.541666875 1.4374996874999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf375 4.7705434053 0 82.6366661 1.295000850000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf376 6.11253055706 0 81.839166625 2.4912500624999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf377 5.13531236708 0 81.947499425 2.3287508624999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf378 5.68419530524 0 81.885832675 2.4212509874999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf379 3.3989410127 0 82.595833525 1.356249712499995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf380 3.3989410127 0 82.13083365 2.0537495250000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf381 5.26587015 0 82.03000015 2.2049997749999903 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf382 5.8542348193 0 81.706667125 2.6899993125000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf383 3.12871807395 0 81.815833525 2.5262497124999967 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf384 6.01904155181 0 81.811666275 2.53250058750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf385 4.20358583892 0 81.661666975 2.7574995374999958 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 22 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf386 3.91060095759 0 82.50666635 1.490000474999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf387 3.75751334685 0 82.062499925 2.156250112500004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf388 6.07933410004 0 82.0766671 2.134999350000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf389 5.73440892644 0 81.7983341 2.552498849999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf390 2.11827448839 0 82.49083305 1.5137504249999907 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf391 5.11910106906 0 82.3866668 1.6699997999999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf392 3.3989410127 0 82.1033331 2.0950003499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 32 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf393 3.25823638691 0 82.740000525 1.1399992125000011 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf394 2.87183708038 0 82.43166675 1.6024998749999924 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 27 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf395 6.09920856411 0 81.79083275 2.5637508749999895 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf396 5.05299439803 0 81.874166875 2.4387496874999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf397 5.55984761608 0 81.838333975 2.4924990375000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf398 6.20896956368 0 81.764166375 2.603750437499997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf399 2.08997085298 0 83.324998925 0.5750010749999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf400 2.67909425977 0 83.4074999 0.4925000999999952 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf401 3.39279520729 0 83.512500375 0.3874996249999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf402 3.74096285582 0 83.342499875 0.5575001249999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf403 2.72520189649 0 83.285000675 0.6149993249999938 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf404 2.47020919834 0 83.485832825 0.4141671749999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf405 3.52973327747 0 83.579999725 0.3200002750000067 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf406 3.16324896856 0 83.229167325 0.6708326749999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf407 3.53910737775 0 83.537499625 0.36250037500000476 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf408 2.46717848922 0 83.366667525 0.5333324750000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf409 3.14520849175 0 83.46833325 0.43166674999999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf410 3.73983800903 0 83.396666425 0.5033335749999935 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf411 2.69718545933 0 83.42249985 0.4775001500000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf412 3.45563746073 0 83.18083305 0.4787504249999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf413 3.81594846135 0 83.05333355 0.6699996749999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf414 3.22592627458 0 83.51083305 0.3891669499999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf415 3.32121276575 0 83.50249925 0.3975007500000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf416 2.50486697002 0 83.28333315 0.6166668499999958 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf417 3.43033061199 0 83.376665675 0.5233343250000019 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf418 3.55185414537 0 83.323333775 0.5766662250000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf419 3.45180134988 0 83.13416695 0.5487495750000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf420 2.51737484435 0 83.319999875 0.5800001250000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 25 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf421 2.47649388334 0 83.281667075 0.6183329249999986 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf422 3.52973327747 0 83.4341669 0.4658331000000061 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf423 3.30543425366 0 83.2149995 0.6850004999999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf424 3.69546000476 0 83.3700008 0.5299992000000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf425 2.54866692533 0 83.23500055 0.6649994500000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf426 2.52414632919 0 83.30499955 0.5950004499999949 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf427 3.67819030212 0 83.410000125 0.48999987500000375 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf428 3.66653335987 0 83.6833336 0.2166664000000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf429 2.5369425715 0 83.452499925 0.4475000750000021 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf430 2.47501547765 0 83.296666925 0.6033330750000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf431 2.73512859106 0 83.1441661 0.5337508499999899 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf432 3.73049052619 0 83.43333345 0.4666665499999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf433 3.11116327032 0 83.4500002 0.44999979999999484 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf434 3.59288986667 0 83.6958339 0.20416610000000335 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf435 3.69363094479 0 83.466666825 0.4333331749999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf436 2.44405144164 0 83.6624992 0.23750080000000084 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf437 2.52414632919 0 83.298332625 0.6016673749999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf438 3.43724518747 0 83.469165875 0.4308341249999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf439 2.47403085411 0 83.32166695 0.5783330500000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf440 3.10613698764 0 83.718333025 0.18166697499999318 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf441 3.43696375659 0 83.7041672 0.19583279999999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf442 2.30377764101 0 83.07916665 0.6312500249999928 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 22 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf443 3.86195191894 0 82.57083345 1.3937498250000075 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf444 3.42141563349 0 82.7016671 1.1974993500000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf445 3.18878419794 0 82.9391669 0.8412496499999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf446 3.95857024721 0 83.29499975 0.6050002499999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf447 3.42141563349 0 82.6366663 1.2950005499999975 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf448 3.7537079845 0 82.355000175 1.717499737499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf449 3.89976572994 0 82.8266675 1.0099987500000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf450 3.70562610654 0 82.665833675 1.2512494874999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf451 3.88737706866 0 82.85666655 0.9650001750000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf452 4.09696503312 0 83.220833025 0.6791669749999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf453 4.11156497342 0 82.35666615 1.7150007750000071 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf454 3.8410452777 0 83.389999125 0.5100008749999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf455 3.71389879516 0 82.65583325 1.2662501249999991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf456 3.12961658197 0 82.4508332 1.573750199999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf457 3.54734999035 0 83.430000575 0.469999425000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf458 3.86395152513 0 82.4875004 1.5187493999999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf459 2.89974302229 0 82.382499875 1.6762501875000098 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf460 3.48886042646 0 82.7250005 1.1624992500000104 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf461 4.09292044776 0 83.33333365 0.5666663499999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf462 3.97558461307 0 82.384999075 1.6725013875000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf463 4.09022849113 0 83.3025002 0.5974998000000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf464 3.24530593811 0 82.56083355 1.4087496750000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf465 3.78717886042 0 82.84083325 0.9887501249999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf466 3.93879714412 0 82.830000125 1.004999812500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf467 4.10835433149 0 83.284166175 0.6158338250000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf468 3.90747388907 0 83.33583325 0.5641667500000068 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf469 2.53859661959 0 82.6516662 1.272500700000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf470 4.12303570384 0 82.400000275 1.6499995875000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf471 2.4825808753 0 83.890000725 0.009999275000006969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf472 4.03091892409 0 83.015833475 0.7262497875000093 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf473 3.94573901698 0 82.7941671 1.0587493500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf474 3.81804847244 0 82.634166775 1.2987498375000044 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 6 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf475 3.51197806787 0 82.906666375 0.8900004374999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf476 3.35148515003 0 82.644166625 1.2837500625000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf477 3.64680056168 0 82.7950001 1.0574998500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf478 3.72000418322 0 83.456667675 0.4433323249999944 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf479 4.13277783134 0 82.530832425 1.4537513625000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf480 4.26183413039 0 82.714165825 1.1787512625000076 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf481 3.23620317817 0 83.55666655 0.34333344999999726 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf482 3.42306292045 0 82.685000625 1.222499062499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf483 4.61790687055 0 82.47083265 1.5437510249999917 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf484 3.80271470043 0 82.503333875 1.4949991875000066 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf485 4.0096141492 0 82.503333175 1.4950002375000082 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf486 3.85955229037 0 82.624166675 1.3137499875000032 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf487 3.12961658197 0 82.385832425 1.6712513624999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf488 4.74816504674 0 82.915833575 0.8762496375000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf489 2.33564385687 0 82.100833175 2.098750237499992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 24 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf490 6.31231852253 0 81.88666685 2.419999725000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf491 5.91056159856 0 82.027500225 2.2087496625000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf492 4.09026452693 0 81.93333315 2.3500002750000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf493 2.29057910951 0 82.68499995 1.2225000749999921 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 25 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 25 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf494 3.31826782794 0 81.898333475 2.4024997874999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf495 2.47740126923 0 81.938332325 2.342501512499993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 22 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf496 4.86557422781 0 81.960833525 2.3087497125000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf497 4.39304255921 0 82.727500175 1.1587497374999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf498 3.73256635056 0 82.05666695 2.16499957500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 27 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf499 6.20896956368 0 81.7933327 2.56000095000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf500 3.86268155745 0 81.934167525 2.34874871249999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf501 6.01904155181 0 81.819999625 2.520000562500009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf502 6.35947216799 0 81.776667425 2.584998862500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf503 5.21477775932 0 81.994167525 2.258748712500008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf504 3.4313336476 0 82.8816661 0.9275008499999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf505 3.68328404231 0 82.05750025 2.163749624999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 28 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf506 5.92836922932 0 81.877500125 2.433749812500004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf507 3.80663695682 0 81.9374992 2.3437511999999927 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf508 5.05770084008 0 82.24583295 1.88125057500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf509 3.20777861875 0 82.08083205 2.128751925000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf510 3.45870051953 0 82.066666825 2.149999762500002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf511 5.69409031372 0 81.924167325 2.3637490125000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf512 6.06060941775 0 81.9683331 2.297500350000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf513 4.1918323886 0 82.824166075 1.0137508875000094 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 21 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf514 5.51311465307 0 82.0116664 2.2325004000000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf515 5.94347809389 0 81.9191666 2.3712501000000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf516 4.65273283316 0 83.041665875 0.6875011874999899 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf517 2.6302261544 0 83.335834275 0.5641657250000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf518 3.22579199405 0 83.4974991 0.40250090000000116 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf519 3.27967368224 0 83.130833075 0.5537503875000027 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf520 3.43425412049 0 83.53666685 0.3633331499999969 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf521 3.28200602128 0 83.10166715 0.5974992750000041 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf522 3.59684205595 0 83.177500325 0.48374951250000464 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf523 3.59452470708 0 83.314166525 0.5858334749999955 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf524 2.46849388159 0 83.525832375 0.3741676250000069 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf525 3.59984947477 0 83.574999275 0.3250007250000039 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf526 2.56221404472 0 83.107499125 0.5887513124999941 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf527 2.47649388334 0 83.346667275 0.5533327249999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf528 3.45475224289 0 83.1041657 0.5937514500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf529 3.14847976496 0 83.299999275 0.6000007249999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf530 3.7245329101 0 82.9900001 0.7649998499999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf531 3.62667420737 0 83.133332425 0.5500013624999909 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf532 2.64505034404 0 83.412499775 0.48750022499999945 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf533 3.19448276073 0 83.324167425 0.5758325750000012 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf534 3.4521680168 0 83.34083295 0.5591670499999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf535 3.22348002932 0 83.42249985 0.4775001500000059 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf536 2.46542270496 0 83.481667075 0.41833292499999575 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf537 2.52778795522 0 83.486665925 0.4133340750000031 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf538 2.56221404472 0 83.144166975 0.5337495374999932 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf539 3.27823297285 0 83.20333445 0.6966655500000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf540 3.6180742183 0 82.60750005 1.338749925000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf541 3.92533229567 0 83.4591669 0.4408331000000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf542 3.58859383862 0 82.264166475 1.8537502875000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf543 3.22297604526 0 82.75916615 1.111250775000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf544 3.66372023461 0 82.59166585 1.3625012250000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf545 3.91002002291 0 82.5483341 1.427498849999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf546 3.96917380416 0 82.402500725 1.6462489125000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf547 3.95854211657 0 82.256666125 1.865000812500007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf548 3.66981520647 0 82.52583375 1.4612493749999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf549 3.8906919752 0 82.56250095 1.4062485749999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf550 3.80542946014 0 82.589166625 1.3662500624999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf551 4.01038027961 0 83.020833375 0.7187499375000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf552 3.54824634447 0 82.628332675 1.3075009875000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf553 3.99739578291 0 82.5425003 1.4362495499999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf554 3.42612797341 0 82.64000035 1.289999475000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf555 3.37950116826 0 82.5966667 1.3549999499999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf556 3.85260352333 0 82.516667575 1.4749986375000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf557 3.70250486116 0 83.482500475 0.4174995250000052 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf558 3.85830895124 0 82.7925002 1.0612496999999905 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf559 3.08261368468 0 82.583333825 1.3749992625000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf560 3.84859290112 0 82.804166725 1.0437499124999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf561 3.88910440715 0 82.49000105 1.5149984249999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf562 3.77712344616 0 82.583332925 1.3750006125000098 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf563 3.3516889331 0 82.640000425 1.289999362500005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf564 3.24530593811 0 82.568332375 1.3975014375000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf565 3.76898427543 0 82.60916635 1.336250475000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf566 3.44754596993 0 82.272499425 1.8412508624999901 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf567 4.05924378827 0 82.56500135 1.4024979749999957 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf568 3.93740416705 0 82.522498825 1.4662517625000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf569 3.84132198203 0 82.5683344 1.3974984000000035 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf570 3.09230255687 0 82.6708328 1.2437508000000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf571 3.7426849711 0 82.5908331 1.3637503500000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf572 2.86454312858 0 83.1341669 0.5487496500000049 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf573 3.8409412107 0 82.574167475 1.3887487875000062 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf574 3.42777980223 0 82.648334175 1.2774987374999967 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf575 3.40403815603 0 83.51166655 0.38833344999999897 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 23 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf576 3.88047602719 0 82.532500075 1.4512498874999977 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf577 3.77830204078 0 82.647500475 1.2787492874999984 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf578 3.42777980223 0 82.63249865 1.3012520249999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf579 4.04812478529 0 82.325000375 1.7624994374999972 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf580 3.42612797341 0 82.567500125 1.3987498125000073 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf581 2.71103437454 0 82.644166375 1.2837504375000037 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf582 2.71585198734 0 82.609166725 1.3362499125000085 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf583 3.94925800299 0 82.51000045 1.4849993249999898 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf584 5.94686699811 0 81.866666425 2.450000362499992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf585 6.08581936049 0 81.75666715 2.6149992750000024 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 6 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf586 5.8159977702 0 81.991667725 2.262498412500001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf587 5.84038818508 0 81.9858334 2.2712498999999937 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf588 3.16200541504 0 81.93916565 2.34125152499999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 24 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 4 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 28 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf589 6.13324261561 0 82.027500175 2.2087497375 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf590 3.40390894839 0 82.398333225 1.6525001624999973 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf591 5.96588342505 0 81.875832675 2.436250987500003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf592 6.06542582931 0 81.75916635 2.6112504749999985 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf593 3.75822433713 0 82.285832875 1.8212506875000045 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 3 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 21 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf594 4.83568092525 0 82.5116665 1.482500249999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf595 2.92350480095 0 82.088333675 2.117499487499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv perf 23 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 4 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf596 4.86061160899 0 82.13083405 2.0537489249999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf597 5.80915645539 0 82.07333335 2.139999975000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf598 4.77219926546 0 82.145833575 2.0312496375000038 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf599 4.95642590255 0 82.177500425 1.9837493624999922 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf600 6.3125953848 0 81.8175009 2.5237486500000017 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf601 5.68676212758 0 81.9325008 2.3512488000000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf602 4.87802723389 0 82.616667025 1.3249994625000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf603 4.24152951084 0 82.3408333 1.7387500500000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf604 4.86742481345 0 81.79666635 2.555000475000007 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 3 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf605 2.84079562042 0 81.7549991 2.6175013499999906 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 24 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf606 4.7209030777 0 82.50916645 1.4862503250000074 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf607 3.21429793651 0 82.037499825 2.1937502625000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 7 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf608 6.20896956368 0 81.806666375 2.540000437499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf609 6.11253055706 0 81.9508336 2.3237496000000064 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf610 5.72814889622 0 82.070833225 2.143750162499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf611 6.03912384738 0 81.9508329 2.323750650000008 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf612 4.82854259452 0 81.999166275 2.25125058750001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf613 6.28005303148 0 81.999167275 2.2512490874999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf614 3.53322652378 0 81.8325003 2.50124954999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf615 5.54554314448 0 82.005833575 2.2412496375000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf616 5.92460400808 0 81.8916666 2.4125001000000097 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 7 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf617 6.24638811174 0 81.81916665 2.5212500250000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf618 3.60668252472 0 82.4383338 1.5924993000000072 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf619 5.00597538776 0 82.5850009 1.3724986500000043 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf620 3.50178718632 0 82.0274992 2.208751200000009 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf621 5.41498979223 0 81.926666575 2.360000137500002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 5 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 5 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf622 6.12963802972 0 81.939999775 2.340000337499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 6 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf623 2.84806440183 0 82.35833275 1.7125008749999964 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 21 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 22 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 34 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf624 4.53047135975 0 82.752499075 1.1212513874999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf625 6.45517159164 0 81.904999375 2.392500937499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 4 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 promise swing_level 4 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 22 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf626 2.77533156099 0 83.51249885 0.38750114999999996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf627 2.67681442383 0 83.54249975 0.3575002499999954 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf628 3.34131522534 0 83.388333375 0.511666624999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf629 3.91196048681 0 83.385832775 0.5141672250000028 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf630 3.67732734091 0 83.6216665 0.27833349999999657 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf631 3.17642385621 0 83.9708332 0.22916679999999873 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf632 3.7148603211 0 83.47666625 0.42333375000000617 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf633 3.550044405 0 83.379166975 0.5208330249999961 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 4 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf634 3.89833699237 0 83.465000775 0.4349992250000071 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf635 3.41246964545 0 83.380834025 0.5191659750000014 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf636 2.74258180683 0 83.38166735 0.5183326499999993 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf637 3.19236717145 0 83.629166025 0.2708339749999965 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf638 2.66820276722 0 83.504999725 0.39500027499999535 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf639 3.91196048681 0 83.510833025 0.3891669750000034 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf640 3.6987123875 0 83.473332775 0.4266672249999971 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 7 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf641 2.6908653869 0 83.450833775 0.44916622499999337 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 5 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf642 3.21949064499 0 83.1749996 0.48750059999998996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 6 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf643 3.8540168116 0 83.3374999 0.5625001000000026 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 24 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf644 3.56007386967 0 83.6800005 0.21999949999999446 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 25 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf645 3.33812673629 0 83.398332625 0.5016673750000052 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf646 4.04048626029 0 83.339167475 0.5608325250000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf647 3.85170222236 0 83.446665375 0.4533346250000051 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf648 3.90854569412 0 83.439166475 0.46083352500000674 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf649 3.19820449438 0 83.620000625 0.27999937499999705 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf650 3.56169130222 0 83.4466666 0.45333339999999966 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf651 2.47649388334 0 83.3058326 0.5941673999999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 promise swing_level 7 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf652 3.84319484171 0 83.5466665 0.3533334999999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf653 3.61281589655 0 83.4683321 0.4316679000000022 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf654 3.45781979362 0 83.395832825 0.5041671750000006 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf655 3.66427338342 0 83.350832525 0.5491674749999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 7 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf656 3.73380488971 0 83.555833725 0.34416627500000063 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf657 3.34017631976 0 83.901667075 0.29833292499999403 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf658 3.36416579236 0 83.4666676 0.43333240000000617 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf659 3.90051037143 0 83.377498975 0.5225010250000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf660 3.42864727797 0 83.366666 0.533334000000005 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf661 4.00593124871 0 83.6191658 0.28083419999999537 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 24 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf662 3.19455366876 0 83.6241671 0.27583290000000604 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf663 3.73050433946 0 83.313333525 0.5866664749999956 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf664 3.4587813675 0 83.399999225 0.5000007749999981 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf665 3.04008546135 0 83.599166675 0.3008333249999936 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf666 3.78060333145 0 83.4550007 0.44499930000000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf667 3.61962110787 0 83.460833125 0.4391668750000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf668 3.8810965693 0 83.2749996 0.625000399999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf669 3.33861347291 0 83.625000425 0.27499957500000394 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf670 2.56753588524 0 83.6249996 0.2750004000000047 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf671 2.66966635352 0 83.505833375 0.39416662500000543 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 3 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf672 3.62734932603 0 83.393333 0.5066670000000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf673 2.81757910985 0 83.399999475 0.5000005249999987 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf674 3.90909166028 0 83.35083335 0.549166649999998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf675 3.67412584098 0 83.899166975 0.0008330250000000428 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv fp16 1 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 6 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf676 3.89711553068 0 83.522500025 0.37749997500000065 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 7 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 6 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf677 3.62570087392 0 83.388333425 0.511666575000001 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf678 3.07523718155 0 82.7808333 1.0787500500000036 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 25 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf679 4.13425005433 0 82.42083265 1.6187510250000088 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf680 3.61960948722 0 82.8216667 1.0174999500000084 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf681 3.77350891881 0 83.4066669 0.4933330999999953 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf682 3.29849329899 0 82.699167225 1.2012491625000052 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf683 2.53422103757 0 82.725833325 1.1612500125000054 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 35 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf684 3.07598772844 0 82.70083335 1.1987499750000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 3 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf685 2.9984674801 0 82.483333625 1.5249995624999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 4 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf686 2.69034021678 0 82.911667225 0.8824991624999967 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 gpu mul fp16 1 add fp32 1 -84 gpu softmax fp32 1 ------ -+++++ -conf687 3.6121448006 0 82.63666585 1.295001225 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 7 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf688 3.72756318437 0 82.55749965 1.4137505250000046 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf689 3.55023099238 0 82.526666675 1.459999987499998 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 6 -84 gpu softmax fp32 1 ------ -+++++ -conf690 3.42777980223 0 82.684999375 1.2225009374999942 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 23 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf691 4.84352798061 0 82.450834125 1.5737488124999999 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 6 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf692 3.76776786291 0 82.559167175 1.4112492375000016 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf693 4.51358276297 0 82.44000035 1.589999474999992 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf694 3.72458652593 0 82.6208338 1.3187493000000003 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf695 3.76348907779 0 82.8208328 1.0187507999999923 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 21 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf696 3.69520391434 0 82.767499375 1.0987509375000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv perf 23 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf697 3.66560916957 0 82.67916755 1.2312486749999962 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 6 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf698 3.77027202063 0 82.488333725 1.517499412499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 23 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf699 3.75097348493 0 82.7166668 1.174999800000002 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 promise swing_level 7 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf700 2.45929083235 0 82.681665775 1.2275013375000015 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 5 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 gpu conv fp16 1 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 5 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv samp 33 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 25 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf701 3.23452197803 0 82.6866664 1.2200004000000106 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv perf 21 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf702 3.78589104303 0 82.468333725 1.5474994125000094 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf703 3.67105883538 0 82.655833575 1.266249637499996 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv fp16 1 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf704 3.68966627876 0 82.65166675 1.272499874999994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 4 -84 gpu softmax fp32 1 ------ -+++++ -conf705 3.76017408275 0 82.621666375 1.3175004374999943 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf706 3.24530593811 0 82.58416615 1.3737507749999978 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf707 2.8309510337 0 82.354167125 1.7187493124999946 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 24 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf708 3.65387014178 0 82.29333365 1.8099995250000092 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv samp 35 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf709 3.71442465807 0 82.654166575 1.2687501374999925 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 5 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf710 3.14961884209 0 82.7824993 1.0762510500000033 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 gpu conv fp16 1 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 22 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 7 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf711 2.83638434085 0 82.5541664 1.4187504000000004 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 5 -84 gpu softmax fp32 1 ------ -+++++ -conf712 2.91601847724 0 82.573332825 1.3900007625000086 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf713 3.64186373922 0 82.65666695 1.2649995749999974 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf714 2.81202046932 0 82.533333575 1.4499996374999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf715 3.76108652872 0 82.46333325 1.5550001249999923 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 gpu conv fp16 1 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf716 3.65879322305 0 82.645832475 1.281251287499991 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf717 3.70045394085 0 82.521666475 1.4675002874999947 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 6 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf718 3.72520627099 0 82.66583245 1.2512513250000055 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 4 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 3 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf719 3.24756557937 0 82.614999375 1.3275009375000053 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 5 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf720 3.30885003192 0 82.649167625 1.276248562499994 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 7 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 5 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf721 4.87113873596 0 82.313334075 1.7799988874999997 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 promise swing_level 4 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv fp16 1 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 3 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf722 2.99807106899 0 82.5941672 1.3587491999999983 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 4 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 6 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 24 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 gpu conv perf 28 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf723 2.72823552171 0 82.50916655 1.486250174999995 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 gpu conv perf 23 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 7 -84 gpu softmax fp32 1 ------ -+++++ -conf724 2.80778843881 0 82.6516676 1.2724986000000058 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 gpu conv fp16 1 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 gpu conv perf 21 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 4 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf725 3.78397139373 0 82.5358331 1.4462503499999926 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp16 1 -16 gpu group_conv fp16 1 -17 gpu batchnorm fp16 1 -18 gpu relu fp16 1 -19 gpu conv perf 25 -20 gpu batchnorm fp16 1 -21 gpu relu fp16 1 -22 gpu group_conv fp16 1 -23 gpu batchnorm fp16 1 -24 gpu relu fp16 1 -25 promise swing_level 3 -26 gpu batchnorm fp16 1 -27 gpu relu fp16 1 -28 gpu group_conv fp16 1 -29 gpu batchnorm fp16 1 -30 gpu relu fp16 1 -31 gpu conv fp16 1 -32 gpu batchnorm fp16 1 -33 gpu relu fp16 1 -34 gpu group_conv fp16 1 -35 gpu batchnorm fp16 1 -36 gpu relu fp16 1 -37 promise swing_level 5 -38 gpu batchnorm fp16 1 -39 gpu relu fp16 1 -40 gpu group_conv fp16 1 -41 gpu batchnorm fp16 1 -42 gpu relu fp16 1 -43 gpu conv fp16 1 -44 gpu batchnorm fp16 1 -45 gpu relu fp16 1 -46 gpu group_conv fp16 1 -47 gpu batchnorm fp16 1 -48 gpu relu fp16 1 -49 gpu conv perf 25 -50 gpu batchnorm fp16 1 -51 gpu relu fp16 1 -52 gpu group_conv fp16 1 -53 gpu batchnorm fp16 1 -54 gpu relu fp16 1 -55 promise swing_level 6 -56 gpu batchnorm fp16 1 -57 gpu relu fp16 1 -58 gpu group_conv fp16 1 -59 gpu batchnorm fp16 1 -60 gpu relu fp16 1 -61 promise swing_level 3 -62 gpu batchnorm fp16 1 -63 gpu relu fp16 1 -64 gpu group_conv fp16 1 -65 gpu batchnorm fp16 1 -66 gpu relu fp16 1 -67 promise swing_level 3 -68 gpu batchnorm fp16 1 -69 gpu relu fp16 1 -70 gpu group_conv fp16 1 -71 gpu batchnorm fp16 1 -72 gpu relu fp16 1 -73 promise swing_level 7 -74 gpu batchnorm fp16 1 -75 gpu relu fp16 1 -76 gpu group_conv fp16 1 -77 gpu batchnorm fp16 1 -78 gpu relu fp16 1 -79 promise swing_level 5 -80 gpu batchnorm fp16 1 -81 gpu relu fp16 1 -82 gpu pool_mean fp16 1 -83 promise swing_level 3 -84 gpu softmax fp32 1 ------ -+++++ -conf726 3.54172278638 0 82.500833175 1.4987502375000048 -1 gpu conv fp16 1 -2 gpu batchnorm fp16 1 -3 gpu relu fp16 1 -4 gpu group_conv fp16 1 -5 gpu batchnorm fp16 1 -6 gpu relu fp16 1 -7 gpu conv fp16 1 -8 gpu batchnorm fp16 1 -9 gpu relu fp16 1 -10 gpu group_conv fp16 1 -11 gpu batchnorm fp16 1 -12 gpu relu fp16 1 -13 gpu conv fp16 1 -14 gpu batchnorm fp16 1 -15 gpu relu fp1